| 1 |
#' subset helper function for use reading in large data, called in pcv.sub.read |
|
| 2 |
#' |
|
| 3 |
#' |
|
| 4 |
#' |
|
| 5 |
#' @param inputFile Path to csv file of plantCV output, should be provided internally in read.pcv |
|
| 6 |
#' @param filters filtering conditions, see read.pcv for details. Format as |
|
| 7 |
#' list("trait in area, perimeter", "other contains stringToMatch")
|
|
| 8 |
#' @param awk Optional awk command to use instead. |
|
| 9 |
#' @keywords read.csv pcv wide long |
|
| 10 |
#' @details awkHelper attempts to make awk commands from human readable input. |
|
| 11 |
#' Currently when filters are supplied the input file has quotes removed by `sed` |
|
| 12 |
#' then is piped into awk, so an equivalent command line statement may be: |
|
| 13 |
#' \code{sed 's/\"//g' pcvrTest2.csv | awk -F ',' '{ if (NR==1 || $18=="area") { print } }'}
|
|
| 14 |
#' @return Returns a character string representing a unix style awk statement |
|
| 15 |
#' which is typically passed to \code{pipe} or used as a connection in \code{data.table::fread}.
|
|
| 16 |
#' @importFrom utils read.csv capture.output |
|
| 17 |
#' @examples |
|
| 18 |
#' link1 <- "https://gist.githubusercontent.com/seankross/" |
|
| 19 |
#' link2 <- "a412dfbd88b3db70b74b/raw/5f23f993cd87c283ce766e7ac6b329ee7cc2e1d1/mtcars.csv" |
|
| 20 |
#' file <- paste0(link1, link2) |
|
| 21 |
#' awkHelper(file, list("gear in 4, 3"), awk = NULL)
|
|
| 22 |
#' awkHelper(file, "gear contains 3", awk = NULL) |
|
| 23 |
#' # note that to be filtered the file has to exist on your local system, this example is only to show |
|
| 24 |
#' # the output of awkHelper, which would then be executed by read.pcv |
|
| 25 |
#' awkHelper(file, list("gear in 4, 3"), awk = "existing_command")
|
|
| 26 |
#' |
|
| 27 |
#' @export |
|
| 28 |
awkHelper <- function(inputFile, filters, awk = NULL) {
|
|
| 29 | 3x |
if (is.null(awk)) {
|
| 30 | 2x |
if (!is.list(filters)) {
|
| 31 | 1x |
filters <- list(filters) |
| 32 |
} |
|
| 33 | 2x |
sed <- paste0("sed 's/\"//g' ", inputFile, " | ")
|
| 34 | 2x |
awkStart <- "awk -F " |
| 35 | 2x |
awkDelim <- "',' " |
| 36 | 2x |
awkFiltStart <- "'{ if ("
|
| 37 | 2x |
awkFiltEnd <- ") { print } }'"
|
| 38 | 2x |
COLS <- colnames(read.csv(inputFile, nrows = 1)) |
| 39 | 2x |
awkFilts <- lapply(filters, function(filt) {
|
| 40 | 2x |
filtCol <- strsplit(filt, " ")[[1]][1] |
| 41 | 2x |
affector <- strsplit(filt, " ")[[1]][2] |
| 42 | 2x |
values <- trimws(gsub(",", " ", strsplit(filt, " ")[[1]][-c(1:2)]))
|
| 43 | 2x |
if (affector %in% c("in", "is", "=")) {
|
| 44 | 1x |
paste(paste0("($", which(COLS == filtCol), '=="', values, '")'), collapse = " || ")
|
| 45 | 1x |
} else if (affector == "contains") {
|
| 46 | 1x |
valReg <- paste0(values, collapse = "|") |
| 47 | 1x |
paste0("($", which(COLS == filtCol), " ~ /", valReg, "/)")
|
| 48 |
} |
|
| 49 |
}) |
|
| 50 | 2x |
awkFilt <- paste(paste("(", awkFilts, ")"), collapse = " && ")
|
| 51 | 2x |
awkCommand <- capture.output(cat(sed, awkStart, awkDelim, awkFiltStart, awkFilt, awkFiltEnd)) |
| 52 |
} else {
|
|
| 53 | 1x |
awkCommand <- awk |
| 54 |
} |
|
| 55 | 3x |
return(awkCommand) |
| 56 |
} |
| 1 |
#' @description |
|
| 2 |
#' Internal function for calculating \mu and \sigma of the normally distributed mean of lognormal data |
|
| 3 |
#' given an estimate of the lognormal \sigma obtained via the method of moments using multi value |
|
| 4 |
#' traits. |
|
| 5 |
#' @param s1 A data.frame or matrix of multi value traits. The column names should include a number |
|
| 6 |
#' representing the "bin". |
|
| 7 |
#' |
|
| 8 |
#' @examples |
|
| 9 |
#' |
|
| 10 |
#' mv_ln <- mvSim( |
|
| 11 |
#' dists = list( |
|
| 12 |
#' rlnorm = list(meanlog = log(130), sdlog = log(1.2)) |
|
| 13 |
#' ), |
|
| 14 |
#' n_samples = 30 |
|
| 15 |
#' ) |
|
| 16 |
#' .conj_lognormal_mv( |
|
| 17 |
#' s1 = mv_ln[1:30, -1], |
|
| 18 |
#' priors = list(mu_log = c(log(10)), n = c(1), sigma_log = c(log(3))), |
|
| 19 |
#' plot = FALSE, cred.int.level = 0.9 |
|
| 20 |
#' ) |
|
| 21 |
#' |
|
| 22 |
#' @importFrom stats qnorm |
|
| 23 |
#' @keywords internal |
|
| 24 |
#' @noRd |
|
| 25 | ||
| 26 |
.conj_lognormal_mv <- function(s1 = NULL, priors = NULL, |
|
| 27 |
plot = FALSE, support = NULL, cred.int.level = NULL, |
|
| 28 |
calculatingSupport = FALSE) {
|
|
| 29 | 18x |
out <- list() |
| 30 |
#* `make default prior if none provided` |
|
| 31 | 18x |
if (is.null(priors)) {
|
| 32 | 8x |
priors <- list(mu = 0, sd = 5) |
| 33 |
} |
|
| 34 |
#* `Reorder columns if they are not in the numeric order` |
|
| 35 | 18x |
histColsBin <- as.numeric(sub("[a-zA-Z_.]+", "", colnames(s1)))
|
| 36 | 18x |
bins_order <- sort(histColsBin, index.return = TRUE)$ix |
| 37 | 18x |
s1 <- s1[, bins_order] |
| 38 | ||
| 39 |
#* `Loop over reps, get moments for each histogram` |
|
| 40 | ||
| 41 | 18x |
rep_distributions <- lapply(seq_len(nrow(s1)), function(i) {
|
| 42 | 422x |
X1 <- rep(histColsBin[bins_order], as.numeric(s1[i, ])) |
| 43 |
#* `Get mean of X1` |
|
| 44 | 422x |
x_bar <- mean(X1) |
| 45 | 422x |
mu_x1 <- log(x_bar / (sqrt(var(X1) / x_bar^2) + 1)) |
| 46 |
#* `Get sigma of X1` |
|
| 47 | 422x |
sigma_x1 <- sqrt(log((var(X1)) / (x_bar^2) + 1)) |
| 48 |
#* `Update Normal Distribution of Mu` |
|
| 49 |
#* sufficient stats: n, mean of log data | precision |
|
| 50 | 422x |
n <- length(X1) |
| 51 | 422x |
m <- priors$mu[1] |
| 52 | 422x |
p <- 1 / (priors$sd[1]^2) # precision |
| 53 | 422x |
mu_prime <- ((m * p) + (n * p * mu_x1)) / (p + (n * p)) |
| 54 | 422x |
precision_prime <- (p + (n * p)) |
| 55 | 422x |
var_prime <- 1 / precision_prime |
| 56 | 422x |
sd_prime <- sqrt(var_prime) |
| 57 | 422x |
return(list("mu" = mu_prime, "sd" = sd_prime, "ln_sd" = sigma_x1))
|
| 58 |
}) |
|
| 59 |
#* `Unlist parameters` |
|
| 60 | 18x |
mu_ls_prime <- mean(unlist(lapply(rep_distributions, function(i) {
|
| 61 | 422x |
i$mu |
| 62 |
}))) |
|
| 63 | 18x |
sigma_ls_prime <- mean(unlist(lapply(rep_distributions, function(i) {
|
| 64 | 422x |
i$sd |
| 65 |
}))) |
|
| 66 | 18x |
ln_sigma_prime <- mean(unlist(lapply(rep_distributions, function(i) {
|
| 67 | 422x |
i$ln_sd |
| 68 |
}))) |
|
| 69 | ||
| 70 |
#* `Define support if it is missing` |
|
| 71 | 18x |
if (is.null(support) && calculatingSupport) {
|
| 72 | 9x |
quantiles <- stats::qnorm(c(0.0001, 0.9999), mu_ls_prime, sigma_ls_prime) |
| 73 | 9x |
return(quantiles) |
| 74 |
} |
|
| 75 |
#* `posterior` |
|
| 76 | 9x |
dens1 <- stats::dnorm(support, mu_ls_prime, sigma_ls_prime) |
| 77 | 9x |
pdf1 <- dens1 / sum(dens1) |
| 78 | 9x |
hde1 <- mu_ls_prime |
| 79 | 9x |
hdi1 <- stats::qnorm( |
| 80 | 9x |
c( |
| 81 | 9x |
(1 - cred.int.level) / 2, |
| 82 | 9x |
(1 - ((1 - cred.int.level) / 2)) |
| 83 |
), |
|
| 84 | 9x |
mu_ls_prime, sigma_ls_prime |
| 85 |
) |
|
| 86 |
#* `Store summary` |
|
| 87 | 9x |
out$summary <- data.frame(HDE_1 = hde1, HDI_1_low = hdi1[1], HDI_1_high = hdi1[2]) |
| 88 | 9x |
out$posterior$mu <- mu_ls_prime |
| 89 | 9x |
out$posterior$sd <- sigma_ls_prime |
| 90 | 9x |
out$posterior$lognormal_sigma <- ln_sigma_prime |
| 91 |
#* `Make Posterior Draws` |
|
| 92 | 9x |
out$posteriorDraws <- stats::rnorm(10000, mu_ls_prime, sigma_ls_prime) |
| 93 | 9x |
out$pdf <- pdf1 |
| 94 |
#* `save s1 data for plotting` |
|
| 95 | 9x |
if (plot) {
|
| 96 | 4x |
out$plot_df <- data.frame( |
| 97 | 4x |
"range" = support, |
| 98 | 4x |
"prob" = pdf1, |
| 99 | 4x |
"sample" = rep("Sample 1", length(support))
|
| 100 |
) |
|
| 101 |
} |
|
| 102 | 9x |
return(out) |
| 103 |
} |
|
| 104 | ||
| 105 | ||
| 106 | ||
| 107 |
#' @description |
|
| 108 |
#' Internal function for calculating \mu and \sigma of the normally distributed mean of lognormal data |
|
| 109 |
#' given an estimate of the lognormal \sigma obtained via the method of moments using single value |
|
| 110 |
#' traits. |
|
| 111 |
#' |
|
| 112 |
#' @param s1 A vector of numerics drawn from a gaussian distribution. |
|
| 113 |
#' @examples |
|
| 114 |
#' .conj_lognormal_sv( |
|
| 115 |
#' s1 = rlnorm(100, log(130), log(1.3)), |
|
| 116 |
#' priors = list(mu = 5, sd = 5), |
|
| 117 |
#' plot = FALSE, |
|
| 118 |
#' cred.int.level = 0.89 |
|
| 119 |
#' ) |
|
| 120 |
#' @keywords internal |
|
| 121 |
#' @noRd |
|
| 122 | ||
| 123 |
.conj_lognormal_sv <- function(s1 = NULL, priors = NULL, |
|
| 124 |
plot = FALSE, support = NULL, cred.int.level = NULL, |
|
| 125 |
calculatingSupport = FALSE) {
|
|
| 126 | 14x |
out <- list() |
| 127 |
#* `make default prior if none provided` |
|
| 128 | 14x |
if (is.null(priors)) {
|
| 129 | 4x |
priors <- list(mu = 0, sd = 5) |
| 130 |
} |
|
| 131 |
#* `Get mean of s1` |
|
| 132 | 14x |
x_bar <- mean(s1) |
| 133 | 14x |
mu_s1 <- log(x_bar / (sqrt(var(s1) / x_bar^2) + 1)) |
| 134 |
#* `Get sigma of s1` |
|
| 135 | 14x |
sigma_s1 <- sqrt(log((var(s1)) / (x_bar^2) + 1)) |
| 136 |
#* `Update Normal Distribution of Mu` |
|
| 137 |
#* sufficient stats: n, mean of log data | precision |
|
| 138 | 14x |
n <- length(s1) |
| 139 | 14x |
m <- priors$mu[1] |
| 140 | 14x |
p <- 1 / (priors$sd[1]^2) # precision |
| 141 | 14x |
mu_prime <- ((m * p) + (n * p * mu_s1)) / (p + (n * p)) |
| 142 | 14x |
precision_prime <- (p + (n * p)) |
| 143 | 14x |
var_prime <- 1 / precision_prime |
| 144 | 14x |
sd_prime <- sqrt(var_prime) |
| 145 |
#* `Define support if it is missing` |
|
| 146 | 14x |
if (is.null(support) && calculatingSupport) {
|
| 147 | 7x |
quantiles <- stats::qnorm(c(0.0001, 0.9999), mu_prime, sd_prime) |
| 148 | 7x |
return(quantiles) |
| 149 |
} |
|
| 150 |
#* `posterior` |
|
| 151 | 7x |
dens1 <- dnorm(support, mu_prime, sd_prime) |
| 152 | 7x |
pdf1 <- dens1 / sum(dens1) |
| 153 | 7x |
hde1 <- mu_prime |
| 154 | 7x |
hdi1 <- qnorm(c((1 - cred.int.level) / 2, (1 - ((1 - cred.int.level) / 2))), mu_prime, sd_prime) |
| 155 |
#* `Store summary` |
|
| 156 | 7x |
out$summary <- data.frame(HDE_1 = hde1, HDI_1_low = hdi1[1], HDI_1_high = hdi1[2]) |
| 157 | 7x |
out$posterior$mu <- mu_prime |
| 158 | 7x |
out$posterior$sd <- sd_prime |
| 159 | 7x |
out$posterior$lognormal_sigma <- sigma_s1 # returning this as a number, not a distribution |
| 160 |
#* `Make Posterior Draws` |
|
| 161 | 7x |
out$posteriorDraws <- rnorm(10000, mu_prime, sd_prime) |
| 162 | 7x |
out$pdf <- pdf1 |
| 163 |
#* `save s1 data for plotting` |
|
| 164 | 7x |
if (plot) {
|
| 165 | 2x |
out$plot_df <- data.frame( |
| 166 | 2x |
"range" = support, |
| 167 | 2x |
"prob" = pdf1, |
| 168 | 2x |
"sample" = rep("Sample 1", length(support))
|
| 169 |
) |
|
| 170 |
} |
|
| 171 | 7x |
return(out) |
| 172 |
} |
| 1 |
#' @description |
|
| 2 |
#' Internal function for calculating the pareto distribution of the upper boundary of a uniform |
|
| 3 |
#' distribution represented by single value traits. |
|
| 4 |
#' @param s1 A vector of numerics drawn from a uniform distribution. |
|
| 5 |
#' @examples |
|
| 6 |
#' out <- .conj_bivariate_lognormal_sv( |
|
| 7 |
#' s1 = rlnorm(10, log(20), 1), cred.int.level = 0.95, |
|
| 8 |
#' plot = FALSE |
|
| 9 |
#' ) |
|
| 10 |
#' lapply(out, head) |
|
| 11 |
#' |
|
| 12 |
#' @keywords internal |
|
| 13 |
#' @noRd |
|
| 14 | ||
| 15 |
.conj_bivariate_lognormal_sv <- function(s1 = NULL, priors = NULL, |
|
| 16 |
plot = FALSE, support = NULL, cred.int.level = NULL, |
|
| 17 |
calculatingSupport = FALSE) {
|
|
| 18 | 4x |
out <- list() |
| 19 |
#* `make default prior if none provided` |
|
| 20 |
#* conjugate prior needs alpha, beta, mu, prec (or var or sd) |
|
| 21 |
#* precision is Gamma(alpha, beta) |
|
| 22 |
#* mu is T_[2*alpha](mu, precision) |
|
| 23 | 4x |
if (is.null(priors)) {
|
| 24 | 4x |
priors <- list(mu = 0, sd = 10, a = 1, b = 1) |
| 25 |
} |
|
| 26 |
#* `Extract prior components` |
|
| 27 | 4x |
alpha <- priors$a[1] |
| 28 | 4x |
beta <- priors$b[1] |
| 29 | 4x |
mu <- priors$mu[1] |
| 30 | 4x |
prec <- 1 / (priors$sd^2) |
| 31 |
#* `Calculate sufficient statistics` |
|
| 32 | 4x |
n <- length(s1) |
| 33 | 4x |
x_bar <- mean(log(s1)) |
| 34 | 4x |
ss <- sum((s1 - x_bar)^2) |
| 35 |
#* `Update priors with sufficient statistics` |
|
| 36 | 4x |
alpha_prime <- alpha + (n / 2) |
| 37 | 4x |
beta_prime <- 1 / ((1 / beta) + (ss / 2) + ((prec * n * ((x_bar - mu)^2)) / (2 * (prec + n)))) |
| 38 | 4x |
mu_prime <- ((prec * mu) + (n * x_bar)) / (prec + n) |
| 39 | 4x |
prec_prime <- prec + n |
| 40 | 4x |
df_prime <- 2 * alpha_prime |
| 41 | 4x |
prec_prime_t <- alpha_prime * prec_prime * beta_prime |
| 42 | 4x |
sigma_prime <- sqrt(1 / prec_prime_t) |
| 43 |
#* `Define bivariate support if it is missing` |
|
| 44 | 4x |
if (is.null(support)) {
|
| 45 | 2x |
quantiles_mu <- extraDistr::qlst(c(0.0001, 0.9999), df_prime, mu_prime, sigma_prime) |
| 46 | 2x |
quantiles_prec <- stats::qgamma(c(0.0001, 0.9999), shape = alpha_prime, scale = beta_prime) |
| 47 | 2x |
support_mu <- seq(quantiles_mu[1], quantiles_mu[2], length.out = 10000) |
| 48 | 2x |
support_prec <- seq(quantiles_prec[1], quantiles_prec[2], length.out = 10000) |
| 49 | 2x |
if (calculatingSupport) {
|
| 50 | 2x |
return(list("Mu" = quantiles_mu, "Prec" = quantiles_prec))
|
| 51 |
} |
|
| 52 |
} else {
|
|
| 53 | 2x |
support_mu <- support$Mu |
| 54 | 2x |
support_prec <- support$Prec |
| 55 |
} |
|
| 56 |
#* `Make Posterior Draws` |
|
| 57 | 2x |
out$posteriorDraws <- .conj_biv_rough_sampling( |
| 58 | 2x |
10000, alpha_prime, beta_prime, |
| 59 | 2x |
mu_prime, sigma_prime, df_prime |
| 60 |
) |
|
| 61 |
#* `posterior` |
|
| 62 | 2x |
dens_mu <- extraDistr::dlst(support_mu, df_prime, mu_prime, sigma_prime) |
| 63 | 2x |
dens_prec <- stats::dgamma(support_prec, shape = alpha_prime, scale = beta_prime) |
| 64 | ||
| 65 | 2x |
pdf_mu <- dens_mu / sum(dens_mu) |
| 66 | 2x |
pdf_prec <- dens_prec / sum(dens_prec) |
| 67 | 2x |
out$pdf <- list("Mu" = pdf_mu, "Prec" = pdf_prec)
|
| 68 | ||
| 69 | 2x |
hde_mu <- mu_prime |
| 70 | 2x |
hde_prec <- .gammaHDE(shape = alpha_prime, scale = beta_prime) |
| 71 | 2x |
hdi_mu <- -1 * rev(extraDistr::qlst( |
| 72 | 2x |
c((1 - cred.int.level) / 2, (1 - ((1 - cred.int.level) / 2))), |
| 73 | 2x |
df_prime, mu_prime, sigma_prime |
| 74 |
)) |
|
| 75 | 2x |
hdi_prec <- -1 * rev(stats::qgamma( |
| 76 | 2x |
c((1 - cred.int.level) / 2, (1 - ((1 - cred.int.level) / 2))), |
| 77 | 2x |
shape = alpha_prime, scale = beta_prime |
| 78 |
)) |
|
| 79 | ||
| 80 |
#* `Store summary` |
|
| 81 | 2x |
out$summary <- data.frame( |
| 82 | 2x |
HDE_1 = c(hde_mu, hde_prec), |
| 83 | 2x |
HDI_1_low = c(hdi_mu[1], hdi_prec[1]), |
| 84 | 2x |
HDI_1_high = c(hdi_mu[2], hdi_prec[2]), |
| 85 | 2x |
param = c("Mu", "Prec")
|
| 86 |
) |
|
| 87 | 2x |
out$posterior <- list( |
| 88 | 2x |
"mu" = mu_prime, "sd" = sigma_prime, |
| 89 | 2x |
"a" = alpha_prime, "b" = beta_prime |
| 90 |
) |
|
| 91 |
#* `save s1 data for plotting` |
|
| 92 | 2x |
if (plot) {
|
| 93 | 2x |
out$plot_df <- data.frame( |
| 94 | 2x |
"range" = c(support_mu, support_prec), |
| 95 | 2x |
"prob" = c(pdf_mu, pdf_prec), |
| 96 | 2x |
"param" = rep(c("Mu", "Prec"), each = length(support_mu)),
|
| 97 | 2x |
"sample" = rep("Sample 1", 2 * length(support_mu))
|
| 98 |
) |
|
| 99 |
} |
|
| 100 | 2x |
return(out) |
| 101 |
} |
| 1 |
#' Helper function for visualizing differences in GAMs fit with \code{mgcv::gam}
|
|
| 2 |
#' |
|
| 3 |
#' Note that using GAMs will be less useful than fitting parameterized models as supported by |
|
| 4 |
#' \code{growthSS} and \code{fitGrowth} for common applications in plant phenotyping.
|
|
| 5 |
#' |
|
| 6 |
#' @param model A model fit with smooth terms by \code{mgcv::gam}
|
|
| 7 |
#' @param newdata A data.frame of new data to use to make predictions. If this is left NULL |
|
| 8 |
#' (the default) then |
|
| 9 |
#' an attempt is made to make newdata using the first smooth term in the formula. |
|
| 10 |
#' See examples for guidance on making appropriate newdata |
|
| 11 |
#' @param g1 A character string for the level of byVar to use as the first group to compare, |
|
| 12 |
#' if plot=TRUE then this will be shown in blue. |
|
| 13 |
#' @param g2 The second group to compare (comparison will be g1 - g2). If plot=TRUE then this will be |
|
| 14 |
#' shown in red. |
|
| 15 |
#' @param byVar Categorical variable name used to separate splines as a string. |
|
| 16 |
#' @param smoothVar The variable that splines were used on. This will often be a time variable. |
|
| 17 |
#' @param cis Confidence interval levels, can be multiple. For example, 0.95 would return Q_0.025 and |
|
| 18 |
#' Q_0.975 columns, and c(0.9, 0.95) would return Q_0.025, Q_0.05, Q_0.95, and Q_0.975 columns. |
|
| 19 |
#' Defaults to \code{seq(0.05, 0.95, 0.05)}
|
|
| 20 |
#' @param unconditional Logical, should unconditional variance-covariance be used in calculating |
|
| 21 |
#' standard errors. Defaults to TRUE. |
|
| 22 |
#' @param plot Logical, should a plot of the difference be returned? Defaults to TRUE. |
|
| 23 |
#' |
|
| 24 |
#' @keywords gam |
|
| 25 |
#' @importFrom mgcv gam s |
|
| 26 |
#' @importFrom stats vcov predict df.residual qt |
|
| 27 |
#' @importFrom viridis viridis |
|
| 28 |
#' @import ggplot2 |
|
| 29 |
#' @import patchwork |
|
| 30 |
#' @return A dataframe or a list containing a ggplot and a dataframe |
|
| 31 |
#' @examples |
|
| 32 |
#' |
|
| 33 |
#' ex <- pcvr::growthSim("logistic",
|
|
| 34 |
#' n = 20, t = 25, |
|
| 35 |
#' params = list( |
|
| 36 |
#' "A" = c(200, 160), |
|
| 37 |
#' "B" = c(13, 11), |
|
| 38 |
#' "C" = c(3, 3.5) |
|
| 39 |
#' ) |
|
| 40 |
#' ) |
|
| 41 |
#' |
|
| 42 |
#' m <- mgcv::gam(y ~ group + s(time, by = factor(group)), data = ex) |
|
| 43 |
#' |
|
| 44 |
#' support <- expand.grid( |
|
| 45 |
#' time = seq(min(ex$time), max(ex$time), length = 400), |
|
| 46 |
#' group = factor(unique(ex$group)) |
|
| 47 |
#' ) |
|
| 48 |
#' |
|
| 49 |
#' out <- gam_diff( |
|
| 50 |
#' model = m, newdata = support, g1 = "a", g2 = "b", |
|
| 51 |
#' byVar = "group", smoothVar = "time", plot = TRUE |
|
| 52 |
#' ) |
|
| 53 |
#' dim(out$data) |
|
| 54 |
#' out$plot |
|
| 55 |
#' out2 <- gam_diff( |
|
| 56 |
#' model = m, g1 = "a", g2 = "b", byVar = NULL, smoothVar = NULL, plot = TRUE |
|
| 57 |
#' ) |
|
| 58 |
#' @export |
|
| 59 | ||
| 60 |
gam_diff <- function(model, newdata = NULL, g1, g2, byVar = NULL, smoothVar = NULL, |
|
| 61 |
cis = seq(0.05, 0.95, 0.05), unconditional = TRUE, plot = TRUE) {
|
|
| 62 | 2x |
form <- model$formula |
| 63 | 2x |
rhs <- as.character(form)[3] |
| 64 | 2x |
rg <- regexpr("s\\([a-zA-Z0-9.]+", rhs)
|
| 65 | 2x |
xTerm <- sub("s\\(", "", regmatches(rhs, rg)[1])
|
| 66 | 2x |
rg2 <- regexpr("by\\s?=\\s?.*[a-zA-Z0-9.]+", rhs)
|
| 67 | 2x |
byTerm <- sub(".*\\(", "", sub("by\\s?=\\s?", "", regmatches(rhs, rg2)[1]))
|
| 68 | 2x |
mdf <- model$model |
| 69 | ||
| 70 | 2x |
if (is.null(newdata)) {
|
| 71 | 1x |
newdata <- expand.grid( |
| 72 | 1x |
x = seq(min(mdf[[xTerm]]), max(mdf[[xTerm]]), |
| 73 | 1x |
length.out = round(diff(range(mdf[[xTerm]])) * 500) |
| 74 |
), |
|
| 75 | 1x |
g = factor(unique(mdf[[byTerm]])) |
| 76 |
) |
|
| 77 | 1x |
colnames(newdata) <- c(xTerm, byTerm) |
| 78 |
} |
|
| 79 | 2x |
if (is.null(byVar)) {
|
| 80 | 1x |
byVar <- byTerm |
| 81 |
} |
|
| 82 | 2x |
if (is.null(smoothVar)) {
|
| 83 | 1x |
smoothVar <- xTerm |
| 84 |
} |
|
| 85 | ||
| 86 | 2x |
xp <- stats::predict(model, newdata = newdata, type = "lpmatrix") |
| 87 | 2x |
c1 <- grepl(g1, colnames(xp)) |
| 88 | 2x |
c2 <- grepl(g2, colnames(xp)) |
| 89 | 2x |
r1 <- newdata[[byVar]] == g1 |
| 90 | 2x |
r2 <- newdata[[byVar]] == g2 |
| 91 |
## difference rows of xp for data from comparison |
|
| 92 | 2x |
X <- xp[r1, ] - xp[r2, ] |
| 93 |
## zero out cols of X related to splines for other lochs |
|
| 94 | 2x |
X[, !(c1 | c2)] <- 0 |
| 95 |
## zero out the parametric cols |
|
| 96 | 2x |
X[, !grepl("^s\\(", colnames(xp))] <- 0
|
| 97 | 2x |
dif <- X %*% coef(model) |
| 98 | 2x |
se <- sqrt(rowSums((X %*% stats::vcov(model, unconditional = unconditional)) * X)) |
| 99 | 2x |
df.resid <- stats::df.residual(model) |
| 100 | ||
| 101 | 2x |
cis_diff_df <- do.call(cbind, lapply(cis, function(ci) {
|
| 102 | 38x |
crit <- stats::qt(1 - ((1 - ci) / 2), df.resid, lower.tail = TRUE) |
| 103 | 38x |
upr <- dif + (crit * se) |
| 104 | 38x |
lwr <- dif - (crit * se) |
| 105 | 38x |
setNames(data.frame(upr, lwr), paste0( |
| 106 | 38x |
c("Q_diff_", "Q_diff_"),
|
| 107 | 38x |
round(c(1 - ((1 - ci) / 2), 1 - (1 - ((1 - ci) / 2))), 3) |
| 108 |
)) |
|
| 109 |
})) |
|
| 110 | 2x |
cis_diff_df <- cis_diff_df[, order(colnames(cis_diff_df))] |
| 111 | ||
| 112 | 2x |
out_df <- cbind(data.frame(g1 = g1, g2 = g2, mu = dif, se = se), cis_diff_df) |
| 113 | 2x |
if (plot) {
|
| 114 |
#* `g1 model CIs` |
|
| 115 | 2x |
x_g1 <- xp[r1, ] |
| 116 |
## keep only relevant splines and intercept term |
|
| 117 | 2x |
c1_g1 <- grepl(g1, colnames(xp)) | grepl("\\(Intercept\\)", colnames(xp))
|
| 118 | 2x |
x_g1[, !(c1_g1)] <- 0 |
| 119 |
## zero out the parametric cols |
|
| 120 | 2x |
x_g1[, !grepl("\\(Intercept\\)|^s\\(", colnames(xp))] <- 0
|
| 121 | 2x |
g1_vals <- x_g1 %*% coef(model) |
| 122 | 2x |
se_g1 <- sqrt(rowSums((x_g1 %*% stats::vcov(model, unconditional = unconditional)) * x_g1)) |
| 123 | 2x |
cis_g1_df <- do.call(cbind, lapply(cis, function(ci) {
|
| 124 | 38x |
crit <- stats::qt(1 - ((1 - ci) / 2), df.resid, lower.tail = TRUE) |
| 125 | 38x |
upr <- g1_vals + (crit * se_g1) |
| 126 | 38x |
lwr <- g1_vals - (crit * se_g1) |
| 127 | 38x |
setNames(data.frame(upr, lwr), paste0( |
| 128 | 38x |
c("Q_g1_", "Q_g1_"),
|
| 129 | 38x |
round(c(1 - ((1 - ci) / 2), 1 - (1 - ((1 - ci) / 2))), 3) |
| 130 |
)) |
|
| 131 |
})) |
|
| 132 | 2x |
cis_g1_df <- cis_g1_df[, order(colnames(cis_g1_df))] |
| 133 |
#* `g2 model CIs` |
|
| 134 | 2x |
x_g2 <- xp[r2, ] |
| 135 |
## keep only relevant splines and intercept term |
|
| 136 | 2x |
c2_g2 <- grepl(g2, colnames(xp)) | grepl("\\(Intercept\\)", colnames(xp))
|
| 137 | 2x |
x_g2[, !(c2_g2)] <- 0 |
| 138 |
## zero out the parametric cols |
|
| 139 | 2x |
x_g2[, !grepl("\\(Intercept\\)|^s\\(", colnames(xp))] <- 0
|
| 140 | 2x |
g2_vals <- x_g2 %*% coef(model) |
| 141 | 2x |
se_g2 <- sqrt(rowSums((x_g2 %*% stats::vcov(model, unconditional = unconditional)) * x_g2)) |
| 142 | 2x |
cis_g2_df <- do.call(cbind, lapply(cis, function(ci) {
|
| 143 | 38x |
crit <- stats::qt(1 - ((1 - ci) / 2), df.resid, lower.tail = TRUE) |
| 144 | 38x |
upr <- g2_vals + (crit * se_g2) |
| 145 | 38x |
lwr <- g2_vals - (crit * se_g2) |
| 146 | 38x |
setNames(data.frame(upr, lwr), paste0( |
| 147 | 38x |
c("Q_g2_", "Q_g2_"),
|
| 148 | 38x |
round(c(1 - ((1 - ci) / 2), 1 - (1 - ((1 - ci) / 2))), 3) |
| 149 |
)) |
|
| 150 |
})) |
|
| 151 | 2x |
cis_g2_df <- cis_g2_df[, order(colnames(cis_g2_df))] |
| 152 | ||
| 153 | 2x |
out_df <- cbind(out_df, cis_g1_df, cis_g2_df) |
| 154 |
} |
|
| 155 | ||
| 156 | 2x |
out_df$df.resid <- df.resid |
| 157 | ||
| 158 | 2x |
smoothVarRange <- range(newdata[[smoothVar]], na.rm = TRUE) |
| 159 | 2x |
smoothVarOut <- seq(min(smoothVarRange), max(smoothVarRange), length.out = length(dif)) |
| 160 | 2x |
out_df[[smoothVar]] <- smoothVarOut |
| 161 | 2x |
out <- out_df |
| 162 | 2x |
if (plot) {
|
| 163 | 2x |
p_diff <- .plot_gam_diff(out_df, name_pattern = "Q_diff_") + |
| 164 | 2x |
ggplot2::theme( |
| 165 | 2x |
axis.title.x.bottom = ggplot2::element_blank(), |
| 166 | 2x |
axis.text.x.bottom = ggplot2::element_blank() |
| 167 |
) |
|
| 168 | 2x |
p_model <- .plot_gam_diff(out_df, |
| 169 | 2x |
name_pattern = "Q_g1_", |
| 170 | 2x |
name_pattern2 = "Q_g2_" |
| 171 |
) |
|
| 172 | 2x |
layout_obj <- patchwork::plot_layout( |
| 173 | 2x |
design = c( |
| 174 | 2x |
patchwork::area(1, 1, 4, 6), |
| 175 | 2x |
patchwork::area(5, 1, 6, 6) |
| 176 |
) |
|
| 177 |
) |
|
| 178 | 2x |
patchPlot <- p_model / p_diff + layout_obj |
| 179 | 2x |
out <- list("data" = out_df, "plot" = patchPlot)
|
| 180 |
} |
|
| 181 | 2x |
return(out) |
| 182 |
} |
|
| 183 | ||
| 184 |
#' *********************************************************************************************** |
|
| 185 |
#' *************** `Plot difference in smooths` **************************************** |
|
| 186 |
#' *********************************************************************************************** |
|
| 187 |
#' @description |
|
| 188 |
#' Internal function for plotting spline_diff output |
|
| 189 |
#' |
|
| 190 |
#' @keywords internal |
|
| 191 |
#' @noRd |
|
| 192 | ||
| 193 |
.plot_gam_diff <- function(df, name_pattern = "Q_diff_", name_pattern2 = NULL) {
|
|
| 194 | 4x |
x <- colnames(df)[ncol(df)] |
| 195 | 4x |
nms <- colnames(df) |
| 196 | 4x |
nms <- as.numeric(sub(name_pattern, "", nms[grepl(paste0("^", name_pattern), nms)]))
|
| 197 | 4x |
cis <- numeric() |
| 198 | 4x |
i <- 1 |
| 199 | 4x |
while (length(nms) > 1) {
|
| 200 | 76x |
cis[i] <- max(nms) - min(nms) |
| 201 | 76x |
nms <- nms[-which.max(nms)] |
| 202 | 76x |
nms <- nms[-which.min(nms)] |
| 203 | 76x |
i <- i + 1 |
| 204 |
} |
|
| 205 | 4x |
cis <- rev(cis) |
| 206 | 4x |
virPal <- viridis::viridis(n = length(cis), option = "mako", direction = -1, begin = 0.1) |
| 207 | ||
| 208 | 4x |
if (is.null(name_pattern2)) {
|
| 209 | 2x |
lineLayer <- list( |
| 210 | 2x |
ggplot2::geom_line(ggplot2::aes(y = .data[["mu"]])), |
| 211 | 2x |
ggplot2::geom_hline(yintercept = 0, linetype = 5), |
| 212 | 2x |
ggplot2::labs(y = paste0(df[1, "g1"], " - ", df[1, "g2"])) |
| 213 |
) |
|
| 214 |
} else {
|
|
| 215 | 2x |
lineLayer <- ggplot2::labs(y = "Model Prediction") |
| 216 |
} |
|
| 217 | ||
| 218 | 4x |
splinePlot <- ggplot2::ggplot(df, aes(x = .data[[x]])) + |
| 219 | 4x |
lapply(rev(seq_along(cis)), function(i) {
|
| 220 | 76x |
ci <- cis[i] |
| 221 | 76x |
ggplot2::geom_ribbon( |
| 222 | 76x |
ggplot2::aes( |
| 223 | 76x |
ymin = .data[[paste0(name_pattern, 1 - (1 - ((1 - ci) / 2)))]], |
| 224 | 76x |
ymax = .data[[paste0(name_pattern, 1 - ((1 - ci) / 2))]] |
| 225 |
), |
|
| 226 | 76x |
fill = virPal[i], alpha = 0.5 |
| 227 |
) |
|
| 228 |
}) + |
|
| 229 | 4x |
lineLayer + |
| 230 | 4x |
pcv_theme() |
| 231 | ||
| 232 | 4x |
if (!is.null(name_pattern2)) {
|
| 233 | 2x |
virPal2 <- viridis::viridis(n = length(cis), option = "inferno", direction = -1, begin = 0.1) |
| 234 | 2x |
splinePlot <- splinePlot + |
| 235 | 2x |
lapply(rev(seq_along(cis)), function(i) {
|
| 236 | 38x |
ci <- cis[i] |
| 237 | 38x |
ggplot2::geom_ribbon( |
| 238 | 38x |
ggplot2::aes( |
| 239 | 38x |
ymin = .data[[paste0(name_pattern2, 1 - (1 - ((1 - ci) / 2)))]], |
| 240 | 38x |
ymax = .data[[paste0(name_pattern2, 1 - ((1 - ci) / 2))]] |
| 241 |
), |
|
| 242 | 38x |
fill = virPal2[i], alpha = 0.5 |
| 243 |
) |
|
| 244 |
}) |
|
| 245 |
} |
|
| 246 | ||
| 247 | 4x |
return(splinePlot) |
| 248 |
} |
| 1 |
#' Function to visualize brms models similar to those made using growthSS outputs. |
|
| 2 |
#' |
|
| 3 |
#' Models fit using \link{growthSS} inputs by \link{fitGrowth} (and similar models made through other
|
|
| 4 |
#' means) can be visualized easily using this function. This will generally be called by |
|
| 5 |
#' \code{growthPlot}.
|
|
| 6 |
#' |
|
| 7 |
#' @param fit A brmsfit object, similar to those fit with \code{\link{growthSS}} outputs.
|
|
| 8 |
#' @param form A formula similar to that in \code{growthSS} inputs specifying the outcome,
|
|
| 9 |
#' predictor, and grouping structure of the data as \code{outcome ~ predictor|individual/group}.
|
|
| 10 |
#' @param df An optional dataframe to use in plotting observed growth curves on top of the model. |
|
| 11 |
#' @param groups An optional set of groups to keep in the plot. |
|
| 12 |
#' Defaults to NULL in which case all groups in the model are plotted. |
|
| 13 |
#' @param timeRange An optional range of times to use. This can be used to view predictions for |
|
| 14 |
#' future data if the available data has not reached some point (such as asymptotic size), |
|
| 15 |
#' although prediction using splines outside of the observed range is not necessarily reliable. |
|
| 16 |
#' @param facetGroups logical, should groups be separated in facets? Defaults to TRUE. |
|
| 17 |
#' @param hierarchy_value If a hierarchical model is being plotted, what value should the |
|
| 18 |
#' hiearchical predictor be? If left NULL (the default) the mean value is used. |
|
| 19 |
#' @param vir_option Viridis color scale to use for plotting credible intervals. Defaults to "plasma". |
|
| 20 |
#' @keywords growth-curve brms |
|
| 21 |
#' @import ggplot2 |
|
| 22 |
#' @import viridis |
|
| 23 |
#' @importFrom stats as.formula |
|
| 24 |
#' @examples |
|
| 25 |
#' \donttest{
|
|
| 26 |
#' simdf <- growthSim( |
|
| 27 |
#' "logistic", |
|
| 28 |
#' n = 20, t = 25, |
|
| 29 |
#' params = list("A" = c(200, 160), "B" = c(13, 11), "C" = c(3, 3.5))
|
|
| 30 |
#' ) |
|
| 31 |
#' ss <- growthSS( |
|
| 32 |
#' model = "logistic", form = y ~ time | id / group, sigma = "spline", |
|
| 33 |
#' list("A" = 130, "B" = 10, "C" = 3),
|
|
| 34 |
#' df = simdf, type = "brms" |
|
| 35 |
#' ) |
|
| 36 |
#' fit <- fitGrowth(ss, backend = "cmdstanr", iter = 500, chains = 1, cores = 1) |
|
| 37 |
#' growthPlot(fit = fit, form = y ~ time | group, groups = "a", df = ss$df) |
|
| 38 |
#' } |
|
| 39 |
#' |
|
| 40 |
#' @return Returns a ggplot showing a brms model's credible |
|
| 41 |
#' intervals and optionally the individual growth lines. |
|
| 42 |
#' |
|
| 43 |
#' @export |
|
| 44 | ||
| 45 |
brmPlot <- function(fit, form, df = NULL, groups = NULL, timeRange = NULL, facetGroups = TRUE, |
|
| 46 |
hierarchy_value = NULL, vir_option = "plasma") {
|
|
| 47 | ! |
fitData <- fit$data |
| 48 | ! |
parsed_form <- .parsePcvrForm(form, df) |
| 49 | ! |
if (!is.numeric(fitData[, parsed_form$x]) && !parsed_form$USEG && !parsed_form$USEID) {
|
| 50 | ! |
p <- .brmStaticPlot(fit, form, df, groups, facetGroups, vir_option, fitData, parsed_form) |
| 51 | ! |
return(p) |
| 52 |
} |
|
| 53 | ! |
p <- .brmLongitudinalPlot( |
| 54 | ! |
fit, form, df, groups, timeRange, facetGroups, |
| 55 | ! |
hierarchy_value, vir_option, fitData, parsed_form |
| 56 |
) |
|
| 57 | ! |
return(p) |
| 58 |
} |
|
| 59 | ||
| 60 |
#' @keywords internal |
|
| 61 |
#' @noRd |
|
| 62 |
.brmStaticPlot <- function(fit, form, df = NULL, groups = NULL, facetGroups = TRUE, |
|
| 63 |
vir_option = "plasma", fitData, parsed_form) {
|
|
| 64 | ! |
y <- parsed_form$y |
| 65 | ! |
x <- parsed_form$x |
| 66 | ! |
individual <- parsed_form$individual |
| 67 | ! |
if (individual == "dummyIndividual") {
|
| 68 | ! |
individual <- NULL |
| 69 |
} |
|
| 70 | ! |
group <- parsed_form$group |
| 71 | ! |
df <- parsed_form$data |
| 72 | ! |
probs <- seq(from = 99, to = 1, by = -2) / 100 |
| 73 | ! |
newData <- data.frame( |
| 74 | ! |
x = unique(fitData[[x]]) |
| 75 |
) |
|
| 76 | ! |
colnames(newData) <- x |
| 77 | ! |
predictions <- cbind(newData, predict(fit, newData, probs = probs)) |
| 78 | ! |
if (!is.null(groups)) {
|
| 79 | ! |
predictions <- predictions[predictions$group %in% groups, ] |
| 80 | ! |
if (!is.null(df)) {
|
| 81 | ! |
df <- df[df[[group]] %in% groups, ] |
| 82 |
} |
|
| 83 |
} |
|
| 84 |
#* `facetGroups` |
|
| 85 | ! |
facetLayer <- NULL |
| 86 | ! |
if (facetGroups && length(unique(fitData[[group]])) > 1) {
|
| 87 | ! |
facetLayer <- ggplot2::facet_wrap(as.formula(paste0("~", group)))
|
| 88 |
} |
|
| 89 |
#* `lengthen predictions` |
|
| 90 | ! |
max_prime <- 0.99 |
| 91 | ! |
min_prime <- 0.01 |
| 92 | ! |
max_obs <- 49 |
| 93 | ! |
min_obs <- 1 |
| 94 | ! |
c1 <- (max_prime - min_prime) / (max_obs - min_obs) |
| 95 | ! |
longPreds <- do.call(rbind, lapply(seq_len(nrow(predictions)), function(r) {
|
| 96 | ! |
sub <- predictions[r, ] |
| 97 | ! |
do.call(rbind, lapply(seq(1, 49, 2), function(i) {
|
| 98 | ! |
min <- paste0("Q", i)
|
| 99 | ! |
max <- paste0("Q", 100 - i)
|
| 100 | ! |
iter <- sub[, c(x, "Estimate")] |
| 101 | ! |
iter$q <- round(1 - (c1 * (i - max_obs) + max_prime), 2) |
| 102 | ! |
iter$min <- sub[[min]] |
| 103 | ! |
iter$max <- sub[[max]] |
| 104 | ! |
iter |
| 105 |
})) |
|
| 106 |
})) |
|
| 107 |
#* `Make Numeric Groups` |
|
| 108 | ! |
longPreds$numericGroup <- as.numeric(as.factor(longPreds[[x]])) |
| 109 |
#* `Make plot` |
|
| 110 | ! |
p <- ggplot2::ggplot(longPreds, ggplot2::aes(x = .data[[x]], y = .data$Estimate)) + |
| 111 | ! |
facetLayer + |
| 112 | ! |
ggplot2::labs(x = x, y = y) + |
| 113 | ! |
pcv_theme() |
| 114 | ! |
p <- p + |
| 115 | ! |
lapply(unique(longPreds$q), function(q) {
|
| 116 | ! |
ggplot2::geom_rect( |
| 117 | ! |
data = longPreds[longPreds$q == q, ], |
| 118 | ! |
ggplot2::aes( |
| 119 | ! |
xmin = .data[["numericGroup"]] - c(0.45 * (1 - .data[["q"]])), |
| 120 | ! |
xmax = .data[["numericGroup"]] + c(0.45 * (1 - .data[["q"]])), |
| 121 | ! |
ymin = .data[["min"]], |
| 122 | ! |
ymax = .data[["max"]], |
| 123 | ! |
group = .data[[x]], |
| 124 | ! |
fill = .data[["q"]] |
| 125 | ! |
), alpha = 0.5 |
| 126 |
) |
|
| 127 |
}) + |
|
| 128 | ! |
viridis::scale_fill_viridis(direction = -1, option = vir_option) + |
| 129 | ! |
ggplot2::labs(fill = "Credible\nInterval") |
| 130 | ! |
return(p) |
| 131 |
} |
|
| 132 |
#' @keywords internal |
|
| 133 |
#' @noRd |
|
| 134 | ||
| 135 |
.brmLongitudinalPlot <- function(fit, form, df = NULL, groups = NULL, |
|
| 136 |
timeRange = NULL, facetGroups = TRUE, |
|
| 137 |
hierarchy_value = NULL, vir_option = "plasma", |
|
| 138 |
fitData, parsed_form) {
|
|
| 139 | ! |
y <- parsed_form$y |
| 140 | ! |
x <- parsed_form$x |
| 141 | ! |
individual <- parsed_form$individual |
| 142 | ! |
hierarchical_predictor <- parsed_form$hierarchical_predictor |
| 143 | ! |
if (individual == "dummyIndividual") {
|
| 144 | ! |
individual <- NULL |
| 145 |
} |
|
| 146 | ! |
group <- parsed_form$group |
| 147 | ! |
df <- parsed_form$data |
| 148 | ! |
probs <- seq(from = 99, to = 1, by = -2) / 100 |
| 149 | ! |
if (is.null(timeRange)) {
|
| 150 | ! |
timeRange <- unique(fitData[[x]]) |
| 151 |
} |
|
| 152 | ! |
newData <- data.frame( |
| 153 | ! |
x = rep(timeRange, times = length(unique(fitData[[group]]))), |
| 154 | ! |
group = rep(unique(fitData[[group]]), each = length(timeRange)), |
| 155 | ! |
individual = rep(paste0("new_", seq_along(unique(fitData[[group]]))), each = length(timeRange))
|
| 156 |
) |
|
| 157 | ! |
colnames(newData) <- c(x, group, individual) |
| 158 | ! |
if (!is.null(hierarchical_predictor)) {
|
| 159 | ! |
if (is.null(hierarchy_value)) {
|
| 160 | ! |
hierarchy_value <- mean(fitData[[hierarchical_predictor]]) |
| 161 |
} |
|
| 162 | ! |
newData[[hierarchical_predictor]] <- hierarchy_value |
| 163 |
} |
|
| 164 | ! |
predictions <- cbind(newData, predict(fit, newData, probs = probs)) |
| 165 | ||
| 166 | ! |
if (!is.null(groups)) {
|
| 167 | ! |
predictions <- predictions[predictions$group %in% groups, ] |
| 168 | ! |
if (!is.null(df)) {
|
| 169 | ! |
df <- df[df[[group]] %in% groups, ] |
| 170 |
} |
|
| 171 |
} |
|
| 172 |
#* `facetGroups` |
|
| 173 | ! |
facetLayer <- NULL |
| 174 | ! |
if (facetGroups && length(unique(fitData[[group]])) > 1) {
|
| 175 | ! |
facetLayer <- ggplot2::facet_wrap(as.formula(paste0("~", group)))
|
| 176 |
} |
|
| 177 |
#* `lengthen predictions` |
|
| 178 | ! |
max_prime <- 0.99 |
| 179 | ! |
min_prime <- 0.01 |
| 180 | ! |
max_obs <- 49 |
| 181 | ! |
min_obs <- 1 |
| 182 | ! |
c1 <- (max_prime - min_prime) / (max_obs - min_obs) |
| 183 | ! |
longPreds <- do.call(rbind, lapply(seq_len(nrow(predictions)), function(r) {
|
| 184 | ! |
sub <- predictions[r, ] |
| 185 | ! |
do.call(rbind, lapply(seq(1, 49, 2), function(i) {
|
| 186 | ! |
min <- paste0("Q", i)
|
| 187 | ! |
max <- paste0("Q", 100 - i)
|
| 188 | ! |
iter <- sub[, c(x, group, individual, "Estimate")] |
| 189 | ! |
iter$q <- round(1 - (c1 * (i - max_obs) + max_prime), 2) |
| 190 | ! |
iter$min <- sub[[min]] |
| 191 | ! |
iter$max <- sub[[max]] |
| 192 | ! |
iter |
| 193 |
})) |
|
| 194 |
})) |
|
| 195 |
#* `Make plot` |
|
| 196 | ! |
p <- ggplot2::ggplot(longPreds, ggplot2::aes(x = .data[[x]], y = .data$Estimate)) + |
| 197 | ! |
facetLayer + |
| 198 | ! |
ggplot2::labs(x = x, y = y) + |
| 199 | ! |
pcv_theme() |
| 200 | ! |
p <- p + |
| 201 | ! |
lapply(unique(longPreds$q), function(q) {
|
| 202 | ! |
ggplot2::geom_ribbon( |
| 203 | ! |
data = longPreds[longPreds$q == q, ], |
| 204 | ! |
ggplot2::aes( |
| 205 | ! |
ymin = .data[["min"]], |
| 206 | ! |
ymax = .data[["max"]], |
| 207 | ! |
group = .data[[group]], |
| 208 | ! |
fill = .data[["q"]] |
| 209 | ! |
), alpha = 0.5 |
| 210 |
) |
|
| 211 |
}) + |
|
| 212 | ! |
viridis::scale_fill_viridis(direction = -1, option = vir_option) + |
| 213 | ! |
ggplot2::labs(fill = "Credible\nInterval") |
| 214 | ||
| 215 | ! |
if (!is.null(df) && !is.null(individual)) {
|
| 216 | ! |
p <- p + ggplot2::geom_line( |
| 217 | ! |
data = df, ggplot2::aes(.data[[x]], .data[[y]], |
| 218 | ! |
group = interaction(.data[[individual]], .data[[group]]) |
| 219 |
), |
|
| 220 | ! |
color = "gray20", linewidth = 0.2 |
| 221 |
) |
|
| 222 |
} |
|
| 223 | ! |
return(p) |
| 224 |
} |
| 1 |
#' Helper to make pcvr default priors from several kinds of input |
|
| 2 |
#' @keywords internal |
|
| 3 |
#' @noRd |
|
| 4 | ||
| 5 |
.makePriors <- function(priors, pars, df, group, USEGROUP, sigma, family, formula) {
|
|
| 6 | 41x |
if (is.null(priors)) {
|
| 7 | 6x |
prior <- .explicitDefaultPrior(formula, df, family) |
| 8 | 6x |
return(prior) |
| 9 |
} |
|
| 10 |
#* `if priors is a brmsprior` |
|
| 11 | 35x |
if (any(methods::is(priors, "brmsprior"))) {
|
| 12 | 1x |
return(priors) |
| 13 |
} |
|
| 14 |
#* `if priors is a numeric vector` |
|
| 15 | 34x |
priors <- .fixNumericPriors(priors, pars) |
| 16 |
#* `if priors is a list` |
|
| 17 | 33x |
formatListPriorsRes <- .formatListPriors(priors, pars, df, group, USEGROUP) |
| 18 | 33x |
priors <- formatListPriorsRes[["priors"]] |
| 19 | 33x |
groupedPriors <- formatListPriorsRes[["groupedPriors"]] |
| 20 |
#* `Arrange priors to match pars explicitly` |
|
| 21 | 33x |
if (length(setdiff(pars, names(priors))) > 0) {
|
| 22 | 1x |
specified_pars <- intersect(names(priors), pars) |
| 23 | 1x |
unspecified_pars <- setdiff(pars, names(priors)) |
| 24 | 1x |
priors <- priors[specified_pars] |
| 25 | 1x |
pars <- c(specified_pars, unspecified_pars) |
| 26 |
} else {
|
|
| 27 | 32x |
priors <- priors[pars] |
| 28 |
} |
|
| 29 |
#* `Make stan strings` |
|
| 30 | 33x |
priorStanStrings <- .stanStringHelper(priors, pars, USEGROUP) |
| 31 |
#* `get default priors for intercept only distributional parameters` |
|
| 32 | 33x |
prior <- .initializePriorObject(sigma, family) |
| 33 |
#* `add priors for estimated parameters` |
|
| 34 | 33x |
for (nm in names(priorStanStrings)) {
|
| 35 | 255x |
dist <- priorStanStrings[[nm]] |
| 36 | 255x |
pr <- strsplit(nm, "_")[[1]][1] |
| 37 | 255x |
if (USEGROUP && groupedPriors) { # if there are groups and they have different priors
|
| 38 | 12x |
gr <- paste0(group, strsplit(nm, "_")[[1]][2]) |
| 39 | 12x |
prior <- prior + brms::set_prior(dist, coef = gr, nlpar = pr) |
| 40 |
# currently cannot set lb for prior with coef |
|
| 41 |
# there is a clunky workaround but it wouldn't work with expected data types |
|
| 42 |
# https://github.com/paul-buerkner/brms/issues/86 |
|
| 43 |
} else {
|
|
| 44 | 243x |
lb <- ifelse(grepl("changePoint|I$", pr), NA, 0)
|
| 45 | 243x |
prior <- prior + brms::set_prior(dist, nlpar = pr, lb = lb) |
| 46 |
} |
|
| 47 |
} |
|
| 48 | 33x |
prior <- prior[-1, ] # remove flat prior on b |
| 49 | 33x |
prior <- unique(prior) |
| 50 |
# could add intercept term prior here |
|
| 51 | 33x |
return(prior) |
| 52 |
} |
|
| 53 | ||
| 54 | ||
| 55 |
#' Helper function to fix numeric priors |
|
| 56 |
#' |
|
| 57 |
#' @keywords internal |
|
| 58 |
#' @noRd |
|
| 59 | ||
| 60 |
.fixNumericPriors <- function(priors, pars) {
|
|
| 61 |
#* `if priors is a numeric vector` |
|
| 62 | 34x |
if (is.numeric(priors)) {
|
| 63 | 2x |
if (length(priors) == length(pars)) {
|
| 64 | 1x |
warning("Assuming that prior is in order: ", paste0(pars, collapse = ", "))
|
| 65 | 1x |
priors <- as.list(priors) |
| 66 | 1x |
names(priors) <- pars |
| 67 |
} else {
|
|
| 68 | 1x |
stop(paste0( |
| 69 | 1x |
"`priors` is length ", length(priors), " while the specified model requires ", |
| 70 | 1x |
length(pars), " parameters." |
| 71 |
)) |
|
| 72 |
} |
|
| 73 |
} |
|
| 74 | 33x |
return(priors) |
| 75 |
} |
|
| 76 | ||
| 77 |
#' Helper function to fix numeric priors |
|
| 78 |
#' |
|
| 79 |
#' @keywords internal |
|
| 80 |
#' @noRd |
|
| 81 | ||
| 82 |
.formatListPriors <- function(priors, pars, df, group, USEGROUP) {
|
|
| 83 | 33x |
if (is.list(priors)) {
|
| 84 | 33x |
if (is.null(names(priors))) {
|
| 85 | 1x |
warning("Assuming that each element in priors is in order: ", paste0(pars, collapse = ", "))
|
| 86 | 1x |
names(priors) <- pars |
| 87 |
} |
|
| 88 | 33x |
priors <- priors[!grepl("fixedChangePoint", names(priors))]
|
| 89 | 33x |
if (!all(pars %in% names(priors))) {
|
| 90 | 1x |
warning(paste0( |
| 91 | 1x |
"Parameter names and prior names do not match. Priors include ", |
| 92 | 1x |
paste(setdiff(names(priors), pars), collapse = ", "), |
| 93 | 1x |
"... and parameters include ", |
| 94 | 1x |
paste(setdiff(pars, names(priors)), collapse = ", "), |
| 95 | 1x |
"... Please rename the misspecified priors." |
| 96 |
)) |
|
| 97 |
} |
|
| 98 | 33x |
groupedPriors <- any(unlist(lapply(priors, length)) > 1) |
| 99 |
# if any prior has multiple means then groupedPriors is TRUE |
|
| 100 | ||
| 101 | 33x |
if (groupedPriors) { # if more than one value is specified per parameter
|
| 102 | 2x |
ml <- max(unlist(lapply(priors, length))) |
| 103 | 2x |
priors <- lapply(priors, function(p) rep(p, length.out = ml)) |
| 104 | 2x |
if (any(unlist(lapply(priors, function(p) !is.null(names(p)))))) {
|
| 105 |
# if any inner values are named then apply that to all priors |
|
| 106 | ! |
wch <- which(unlist(lapply(priors, function(p) !is.null(names(p))))) |
| 107 | ! |
nms <- names(priors[[wch]]) |
| 108 | ! |
for (i in seq_along(priors)) {
|
| 109 | ! |
names(priors[[i]]) <- nms |
| 110 |
} |
|
| 111 |
} |
|
| 112 | 2x |
if (any(unlist(lapply(priors, function(p) is.null(names(p)))))) {
|
| 113 |
# if no inner values were named |
|
| 114 | 2x |
for (i in seq_along(priors)) {
|
| 115 | 6x |
names(priors[[i]]) <- unique(df[[group]]) |
| 116 |
} |
|
| 117 |
} |
|
| 118 |
} else { # else is for prior of length 1 for each element,
|
|
| 119 |
# in which case they need to replicated per groups |
|
| 120 |
# this should also handle non-grouped formulae |
|
| 121 | 31x |
l <- length(unique(df[[group]])) |
| 122 | 31x |
priors <- lapply(priors, rep, length.out = l) |
| 123 | 31x |
nms <- unique(df[[group]]) |
| 124 | 31x |
if (USEGROUP) {
|
| 125 | 28x |
for (i in seq_along(priors)) {
|
| 126 | 84x |
names(priors[[i]]) <- nms |
| 127 |
} |
|
| 128 |
} |
|
| 129 |
} |
|
| 130 |
} |
|
| 131 | 33x |
return(list("priors" = priors, "groupedPriors" = groupedPriors))
|
| 132 |
} |
|
| 133 | ||
| 134 | ||
| 135 |
#' Helper function to write stan priors |
|
| 136 |
#' |
|
| 137 |
#' @keywords internal |
|
| 138 |
#' @noRd |
|
| 139 | ||
| 140 |
.stanStringHelper <- function(priors, pars, USEGROUP) {
|
|
| 141 | 33x |
priorStanStrings <- lapply(pars, function(par) {
|
| 142 | 98x |
if (!grepl("changePoint|I$", par)) {
|
| 143 | 91x |
paste0("lognormal(log(", priors[[par]], "), 0.25)") # growth parameters are LN
|
| 144 |
} else {
|
|
| 145 | 7x |
paste0("student_t(5,", priors[[par]], ", 3)") # changepoints/intercepts are T_5(mu, 3)
|
| 146 |
} |
|
| 147 |
}) |
|
| 148 | 33x |
priorStanStrings <- unlist(priorStanStrings) |
| 149 | 33x |
parNames <- rep(names(priors), each = length(priors[[1]])) |
| 150 | 33x |
if (USEGROUP) {
|
| 151 | 30x |
groupNames <- rep(names(priors[[1]]), length.out = length(priorStanStrings)) |
| 152 | 30x |
names(priorStanStrings) <- paste(parNames, groupNames, sep = "_") |
| 153 |
} else {
|
|
| 154 | 3x |
names(priorStanStrings) <- parNames |
| 155 |
} |
|
| 156 | 33x |
return(priorStanStrings) |
| 157 |
} |
|
| 158 | ||
| 159 | ||
| 160 |
#' Helper function to write stan priors |
|
| 161 |
#' |
|
| 162 |
#' @keywords internal |
|
| 163 |
#' @noRd |
|
| 164 | ||
| 165 |
.initializePriorObject <- function(sigma, family) {
|
|
| 166 | 33x |
int_only_dpars <- names(sigma[which(sigma == "not_estimated")]) |
| 167 | 33x |
if (length(int_only_dpars) >= 1) {
|
| 168 | 13x |
int_dpars_form <- as.formula(paste0(paste(int_only_dpars, collapse = "+"), "~1")) |
| 169 |
} else {
|
|
| 170 | 20x |
int_dpars_form <- NULL |
| 171 |
} |
|
| 172 | 33x |
smooth_dpars <- names(sigma[which(sigma %in% c("gam", "spline"))])
|
| 173 | 33x |
if (length(smooth_dpars) >= 1) {
|
| 174 | 7x |
smooth_dpars_form <- as.formula(paste0(paste(smooth_dpars, collapse = "+"), "~s(x)")) |
| 175 |
} else {
|
|
| 176 | 26x |
smooth_dpars_form <- NULL |
| 177 |
} |
|
| 178 | 33x |
flist <- list(int_dpars_form, smooth_dpars_form) |
| 179 | 33x |
flist <- flist[!unlist(lapply(flist, is.null))] |
| 180 | 33x |
if (length(flist) == 0) {
|
| 181 | 20x |
flist <- NULL |
| 182 |
} |
|
| 183 | 33x |
gp <- brms::get_prior(brms::bf(y ~ x, flist = flist), |
| 184 | 33x |
data = data.frame(y = 1:100, x = 1:100), family = family |
| 185 |
) |
|
| 186 | 33x |
prior <- rbind( |
| 187 | 33x |
gp[1, ], gp[gp$dpar %in% smooth_dpars & gp$class == "Intercept", ], |
| 188 | 33x |
gp[gp$dpar %in% int_only_dpars, ] |
| 189 |
) |
|
| 190 | 33x |
return(prior) |
| 191 |
} |
|
| 192 | ||
| 193 |
#' Helper function to explicitly return default priors from get_prior |
|
| 194 |
#' |
|
| 195 |
#' @keywords internal |
|
| 196 |
#' @noRd |
|
| 197 | ||
| 198 |
.explicitDefaultPrior <- function(formula, df, family) {
|
|
| 199 | 6x |
gp <- brms::get_prior(formula = formula, data = df, family = family) |
| 200 | 6x |
return(gp) |
| 201 |
} |
|
| 202 | ||
| 203 |
#' Helper function to reformat sigma argument in brmSS |
|
| 204 |
#' |
|
| 205 |
#' @keywords internal |
|
| 206 |
#' @noRd |
|
| 207 | ||
| 208 |
.sigmaHelper <- function(sigma, dpars, family, models) {
|
|
| 209 | 41x |
if (is.null(sigma)) {
|
| 210 | 22x |
sigma <- lapply(dpars, function(i) {
|
| 211 | 44x |
"int" |
| 212 |
}) |
|
| 213 |
} |
|
| 214 | 41x |
if (methods::is(sigma, "formula")) {
|
| 215 | 1x |
sigma <- list(sigma) |
| 216 |
} |
|
| 217 | ||
| 218 | 41x |
if (length(sigma) > length(dpars)) {
|
| 219 | 1x |
stop(paste0( |
| 220 | 1x |
"sigma contains ", length(sigma), " formulas.", |
| 221 | 1x |
"The specified family (", family, ") only has ", length(dpars),
|
| 222 | 1x |
" valid additional distributional parameters (", paste0(dpars, collapse = ", "), ")."
|
| 223 |
)) |
|
| 224 | 40x |
} else if (length(sigma) < length(dpars)) {
|
| 225 | 17x |
n_to_add <- length(dpars) - length(sigma) |
| 226 | 17x |
sigma <- append(sigma, lapply(1:n_to_add, function(i) paste0("not_estimated")))
|
| 227 | 17x |
names(sigma) <- dpars |
| 228 |
} else { # same length
|
|
| 229 | 23x |
names(sigma) <- dpars |
| 230 |
} |
|
| 231 |
# here I am foregoing pattern matching so that it is simpler to check for intercepts later. |
|
| 232 | 40x |
if (!any(grepl("\\+", sigma))) { # no distributional changepoint models
|
| 233 | 38x |
sigma <- lapply(sigma, identity) |
| 234 |
} |
|
| 235 | ||
| 236 | 40x |
return(sigma) |
| 237 |
} |
|
| 238 | ||
| 239 |
#' Helper function to match growth model |
|
| 240 |
#' |
|
| 241 |
#' @keywords internal |
|
| 242 |
#' @noRd |
|
| 243 | ||
| 244 |
.matchGrowthModel <- function(model, models) {
|
|
| 245 | 39x |
if (!grepl("\\+", model)) {
|
| 246 | 37x |
if (grepl("decay", model)) {
|
| 247 | 1x |
decay <- TRUE |
| 248 | 1x |
model <- trimws(gsub("decay", "", model))
|
| 249 |
} else {
|
|
| 250 | 36x |
decay <- FALSE |
| 251 |
} |
|
| 252 | 37x |
matched_model <- match.arg(model, models) |
| 253 |
} else {
|
|
| 254 | 2x |
matched_model <- model |
| 255 | 2x |
decay <- FALSE |
| 256 |
} |
|
| 257 | 39x |
return(list("model" = matched_model, "decay" = decay))
|
| 258 |
} |
|
| 259 | ||
| 260 | ||
| 261 | ||
| 262 |
#' Helper function for logistic brms formulas |
|
| 263 |
#' |
|
| 264 |
#' @keywords internal |
|
| 265 |
#' @noRd |
|
| 266 | ||
| 267 |
.brms_form_logistic <- function(x, y, group, dpar = FALSE, |
|
| 268 |
nTimes = NULL, useGroup = TRUE, prior = NULL, int = FALSE) {
|
|
| 269 | 13x |
if (dpar) {
|
| 270 | 2x |
if (int) {
|
| 271 | 1x |
form <- brms::nlf(stats::as.formula(paste0( |
| 272 | 1x |
y, " ~ ", y, "I + ", y, "A/(1+exp((",
|
| 273 | 1x |
y, "B-", x, ")/", y, "C))" |
| 274 |
))) |
|
| 275 | 1x |
pars <- paste0(y, LETTERS[c(1:3, 9)]) |
| 276 |
} else {
|
|
| 277 | 1x |
form <- brms::nlf(stats::as.formula(paste0( |
| 278 | 1x |
y, " ~ ", y, "A/(1+exp((",
|
| 279 | 1x |
y, "B-", x, ")/", y, "C))" |
| 280 |
))) |
|
| 281 | 1x |
pars <- paste0(y, LETTERS[1:3]) |
| 282 |
} |
|
| 283 |
} else {
|
|
| 284 | 11x |
if (int) {
|
| 285 | 1x |
form <- stats::as.formula(paste0(y, " ~ I + (A/(1+exp((B-", x, ")/C)))")) |
| 286 | 1x |
pars <- LETTERS[c(1:3, 9)] |
| 287 |
} else {
|
|
| 288 | 10x |
form <- stats::as.formula(paste0(y, " ~ A/(1+exp((B-", x, ")/C))")) |
| 289 | 10x |
pars <- LETTERS[1:3] |
| 290 |
} |
|
| 291 |
} |
|
| 292 | 13x |
return(list(form = form, pars = pars)) |
| 293 |
} |
|
| 294 |
#' Helper function for brms formulas |
|
| 295 |
#' |
|
| 296 |
#' @keywords internal |
|
| 297 |
#' @noRd |
|
| 298 | ||
| 299 |
.brms_form_gompertz <- function(x, y, group, dpar = FALSE, |
|
| 300 |
nTimes = NULL, useGroup = TRUE, prior = NULL, int) {
|
|
| 301 | 15x |
if (dpar) {
|
| 302 | 4x |
if (int) {
|
| 303 | 1x |
form <- brms::nlf(stats::as.formula(paste0( |
| 304 | 1x |
y, " ~ ", y, "I + (",
|
| 305 | 1x |
y, "A*exp(-", y, "B*exp(-", y, "C*", x, ")))" |
| 306 |
))) |
|
| 307 | 1x |
pars <- paste0(y, LETTERS[c(1:3, 9)]) |
| 308 |
} else {
|
|
| 309 | 3x |
form <- brms::nlf(stats::as.formula(paste0( |
| 310 | 3x |
y, " ~ ", |
| 311 | 3x |
y, "A*exp(-", y, "B*exp(-", y, "C*", x, "))" |
| 312 |
))) |
|
| 313 | 3x |
pars <- paste0(y, LETTERS[1:3]) |
| 314 |
} |
|
| 315 |
} else {
|
|
| 316 | 11x |
if (int) {
|
| 317 | 1x |
form <- stats::as.formula(paste0(y, " ~ I + (A*exp(-B*exp(-C*", x, ")))")) |
| 318 | 1x |
pars <- LETTERS[c(1:3, 9)] |
| 319 |
} else {
|
|
| 320 | 10x |
form <- stats::as.formula(paste0(y, " ~ A*exp(-B*exp(-C*", x, "))")) |
| 321 | 10x |
pars <- LETTERS[1:3] |
| 322 |
} |
|
| 323 |
} |
|
| 324 | 15x |
return(list(form = form, pars = pars)) |
| 325 |
} |
|
| 326 |
#' Helper function for brms formulas |
|
| 327 |
#' |
|
| 328 |
#' @keywords internal |
|
| 329 |
#' @noRd |
|
| 330 | ||
| 331 | ||
| 332 |
.brms_form_doublelogistic <- function(x, y, group, dpar = FALSE, |
|
| 333 |
nTimes = NULL, useGroup = TRUE, prior = NULL, int) {
|
|
| 334 | 5x |
if (dpar) {
|
| 335 | 2x |
if (int) {
|
| 336 | 1x |
form <- brms::nlf(stats::as.formula(paste0( |
| 337 | 1x |
y, " ~ ", y, "I + (", y,
|
| 338 | 1x |
"A/(1+exp((", y, "B-", x, ")/", y, "C)) + ((",
|
| 339 | 1x |
y, "A2-", y, "A) /(1+exp((", y, "B2-", x,
|
| 340 | 1x |
")/", y, "C2))))" |
| 341 |
))) |
|
| 342 | 1x |
pars <- paste0(y, c("I", "A", "B", "C", "A2", "B2", "C2"))
|
| 343 |
} else {
|
|
| 344 | 1x |
form <- brms::nlf(stats::as.formula(paste0( |
| 345 | 1x |
y, " ~ ", y, |
| 346 | 1x |
"A/(1+exp((", y, "B-", x, ")/", y, "C)) + ((",
|
| 347 | 1x |
y, "A2-", y, "A) /(1+exp((", y, "B2-", x,
|
| 348 | 1x |
")/", y, "C2)))" |
| 349 |
))) |
|
| 350 | 1x |
pars <- paste0(y, c("A", "B", "C", "A2", "B2", "C2"))
|
| 351 |
} |
|
| 352 |
} else {
|
|
| 353 | 3x |
if (int) {
|
| 354 | 1x |
form <- stats::as.formula(paste0( |
| 355 | 1x |
y, " ~ I + (A/(1+exp((B-", x, ")/C)) + ((A2-A) /(1+exp((B2-", x, ")/C2))))" |
| 356 |
)) |
|
| 357 | 1x |
pars <- c("I", "A", "B", "C", "A2", "B2", "C2")
|
| 358 |
} else {
|
|
| 359 | 2x |
form <- stats::as.formula(paste0( |
| 360 | 2x |
y, " ~ A/(1+exp((B-", x, ")/C)) + ((A2-A) /(1+exp((B2-", x, ")/C2)))" |
| 361 |
)) |
|
| 362 | 2x |
pars <- c("A", "B", "C", "A2", "B2", "C2")
|
| 363 |
} |
|
| 364 |
} |
|
| 365 | 5x |
return(list(form = form, pars = pars)) |
| 366 |
} |
|
| 367 |
#' Helper function for brms formulas |
|
| 368 |
#' |
|
| 369 |
#' @keywords internal |
|
| 370 |
#' @noRd |
|
| 371 | ||
| 372 | ||
| 373 |
.brms_form_doublegompertz <- function(x, y, group, dpar = FALSE, |
|
| 374 |
nTimes = NULL, useGroup = TRUE, prior = NULL, int) {
|
|
| 375 | 5x |
if (dpar) {
|
| 376 | 2x |
if (int) {
|
| 377 | 1x |
form <- brms::nlf(stats::as.formula(paste0( |
| 378 | 1x |
y, " ~ ", y, "I + (", y, "A * exp(-", y, "B * exp(-", y,
|
| 379 | 1x |
"C*", x, ")) + (", y, "A2-", y, "A) * exp(-", y,
|
| 380 | 1x |
"B2 * exp(-", y, "C2*(", x, "-", y, "B))))"
|
| 381 |
))) |
|
| 382 | 1x |
pars <- paste0(y, c("I", "A", "B", "C", "A2", "B2", "C2"))
|
| 383 |
} else {
|
|
| 384 | 1x |
form <- brms::nlf(stats::as.formula(paste0( |
| 385 | 1x |
y, " ~ ", y, "A * exp(-", y, "B * exp(-", y, |
| 386 | 1x |
"C*", x, ")) + (", y, "A2-", y, "A) * exp(-", y,
|
| 387 | 1x |
"B2 * exp(-", y, "C2*(", x, "-", y, "B)))"
|
| 388 |
))) |
|
| 389 | 1x |
pars <- paste0(y, c("A", "B", "C", "A2", "B2", "C2"))
|
| 390 |
} |
|
| 391 |
} else {
|
|
| 392 | 3x |
if (int) {
|
| 393 | 1x |
form <- stats::as.formula(paste0( |
| 394 | 1x |
y, " ~ I + (A * exp(-B * exp(-C*", x, |
| 395 | 1x |
")) + (A2-A) * exp(-B2 * exp(-C2*(", x, "-B))))"
|
| 396 |
)) |
|
| 397 | 1x |
pars <- c("I", "A", "B", "C", "A2", "B2", "C2")
|
| 398 |
} else {
|
|
| 399 | 2x |
form <- stats::as.formula(paste0( |
| 400 | 2x |
y, " ~ A * exp(-B * exp(-C*", x, |
| 401 | 2x |
")) + (A2-A) * exp(-B2 * exp(-C2*(", x, "-B)))"
|
| 402 |
)) |
|
| 403 | 2x |
pars <- c("A", "B", "C", "A2", "B2", "C2")
|
| 404 |
} |
|
| 405 |
} |
|
| 406 | 5x |
return(list(form = form, pars = pars)) |
| 407 |
} |
|
| 408 |
#' Helper function for brms formulas |
|
| 409 |
#' |
|
| 410 |
#' @keywords internal |
|
| 411 |
#' @noRd |
|
| 412 | ||
| 413 | ||
| 414 |
.brms_form_monomolecular <- function(x, y, group, dpar = FALSE, nTimes = NULL, |
|
| 415 |
useGroup = TRUE, prior = NULL, int) {
|
|
| 416 | 5x |
if (dpar) {
|
| 417 | 2x |
if (int) {
|
| 418 | 1x |
form <- brms::nlf(stats::as.formula(paste0( |
| 419 | 1x |
y, " ~ ", y, "I + (",
|
| 420 | 1x |
y, "A-", y, "A*exp(-", y, "B*", x, "))" |
| 421 |
))) |
|
| 422 | 1x |
pars <- paste0(y, LETTERS[c(1:2, 9)]) |
| 423 |
} else {
|
|
| 424 | 1x |
form <- brms::nlf(stats::as.formula(paste0(y, " ~ ", y, "A-", y, "A*exp(-", y, "B*", x, ")"))) |
| 425 | 1x |
pars <- paste0(y, LETTERS[1:2]) |
| 426 |
} |
|
| 427 |
} else {
|
|
| 428 | 3x |
if (int) {
|
| 429 | 1x |
form <- stats::as.formula(paste0(y, "~I + (A-A*exp(-B*", x, "))")) |
| 430 | 1x |
pars <- LETTERS[c(1:2, 9)] |
| 431 |
} else {
|
|
| 432 | 2x |
form <- stats::as.formula(paste0(y, "~A-A*exp(-B*", x, ")")) |
| 433 | 2x |
pars <- LETTERS[1:2] |
| 434 |
} |
|
| 435 |
} |
|
| 436 | 5x |
return(list(form = form, pars = pars)) |
| 437 |
} |
|
| 438 |
#' Helper function for brms formulas |
|
| 439 |
#' |
|
| 440 |
#' @keywords internal |
|
| 441 |
#' @noRd |
|
| 442 | ||
| 443 | ||
| 444 |
.brms_form_exponential <- function(x, y, group, dpar = FALSE, |
|
| 445 |
nTimes = NULL, useGroup = TRUE, prior = NULL, int) {
|
|
| 446 | 5x |
if (dpar) {
|
| 447 | 2x |
if (int) {
|
| 448 | 1x |
form <- brms::nlf(stats::as.formula(paste0( |
| 449 | 1x |
y, " ~ ", |
| 450 | 1x |
y, "I + (", y, "A*exp(", y, "B*", x, "))"
|
| 451 |
))) |
|
| 452 | 1x |
pars <- paste0(y, LETTERS[c(1:2, 9)]) |
| 453 |
} else {
|
|
| 454 | 1x |
form <- brms::nlf(stats::as.formula(paste0(y, " ~ ", y, "A*exp(", y, "B*", x, ")")))
|
| 455 | 1x |
pars <- paste0(y, LETTERS[1:2]) |
| 456 |
} |
|
| 457 |
} else {
|
|
| 458 | 3x |
if (int) {
|
| 459 | 1x |
form <- stats::as.formula(paste0(y, " ~ I + (A*exp(B*", x, "))")) |
| 460 | 1x |
pars <- LETTERS[c(1:2, 9)] |
| 461 |
} else {
|
|
| 462 | 2x |
form <- stats::as.formula(paste0(y, " ~ A*exp(B*", x, ")")) |
| 463 | 2x |
pars <- LETTERS[1:2] |
| 464 |
} |
|
| 465 |
} |
|
| 466 | 5x |
return(list(form = form, pars = pars)) |
| 467 |
} |
|
| 468 |
#' Helper function for brms formulas |
|
| 469 |
#' |
|
| 470 |
#' @keywords internal |
|
| 471 |
#' @noRd |
|
| 472 | ||
| 473 | ||
| 474 |
.brms_form_linear <- function(x, y, group, dpar = FALSE, nTimes = NULL, |
|
| 475 |
useGroup = TRUE, prior = NULL, int) {
|
|
| 476 | 15x |
if (dpar) {
|
| 477 | 7x |
if (!is.null(prior) && any(grepl(paste0(y, "A"), names(prior)))) {
|
| 478 |
#* use non-linear parameterization with subA |
|
| 479 | 1x |
if (int) {
|
| 480 | ! |
form <- brms::nlf(stats::as.formula(paste0(y, " ~ ", y, "I + (", y, "A", "*", x, ")")))
|
| 481 | ! |
pars <- c(paste0(y, c("I", "A")))
|
| 482 |
} else {
|
|
| 483 | 1x |
form <- brms::nlf(stats::as.formula(paste0(y, " ~ ", y, "A", "*", x))) |
| 484 | 1x |
pars <- c(paste0(y, "A")) |
| 485 |
} |
|
| 486 |
} else {
|
|
| 487 |
#* linear parameterization using x directly, no intercept option |
|
| 488 | 6x |
if (int) {
|
| 489 | 4x |
form <- brms::nlf(as.formula(paste0(y, " ~ ", y, "I + (", x, "+", x, ":", group, ")")))
|
| 490 | 4x |
pars <- paste0(y, "I") |
| 491 |
} else {
|
|
| 492 | 2x |
form <- as.formula(paste0(y, " ~ ", x, "+", x, ":", group)) |
| 493 | 2x |
pars <- c() |
| 494 |
} |
|
| 495 |
} |
|
| 496 |
} else { # non-dpar option, with or without intercept
|
|
| 497 | 8x |
if (int) {
|
| 498 | 2x |
form <- stats::as.formula(paste0(y, " ~ I + A*", x)) |
| 499 | 2x |
pars <- c("I", "A")
|
| 500 |
} else {
|
|
| 501 | 6x |
form <- stats::as.formula(paste0(y, " ~ A*", x)) |
| 502 | 6x |
pars <- c("A")
|
| 503 |
} |
|
| 504 |
} |
|
| 505 | 15x |
return(list(form = form, pars = pars)) |
| 506 |
} |
|
| 507 | ||
| 508 |
#' Helper function for brms formulas |
|
| 509 |
#' |
|
| 510 |
#' @keywords internal |
|
| 511 |
#' @noRd |
|
| 512 | ||
| 513 | ||
| 514 |
.brms_form_logarithmic <- function(x, y, group, dpar = FALSE, nTimes = NULL, |
|
| 515 |
useGroup = TRUE, prior = NULL, int) {
|
|
| 516 | 5x |
if (dpar) {
|
| 517 | 2x |
if (int) {
|
| 518 | 1x |
form <- brms::nlf(stats::as.formula(paste0(y, " ~ ", y, "I + (", y, "A*log(", x, "))")))
|
| 519 | 1x |
pars <- paste0(y, LETTERS[c(1, 9)]) |
| 520 |
} else {
|
|
| 521 | 1x |
form <- brms::nlf(stats::as.formula(paste0(y, " ~ ", y, "A*log(", x, ")")))
|
| 522 | 1x |
pars <- paste0(y, "A") |
| 523 |
} |
|
| 524 |
} else {
|
|
| 525 | 3x |
if (int) {
|
| 526 | 1x |
form <- stats::as.formula(paste0(y, " ~ I + (A*log(", x, "))"))
|
| 527 | 1x |
pars <- LETTERS[c(1, 9)] |
| 528 |
} else {
|
|
| 529 | 2x |
form <- stats::as.formula(paste0(y, " ~ A*log(", x, ")"))
|
| 530 | 2x |
pars <- "A" |
| 531 |
} |
|
| 532 |
} |
|
| 533 | 5x |
return(list(form = form, pars = pars)) |
| 534 |
} |
|
| 535 | ||
| 536 |
#' Helper function for brms formulas |
|
| 537 |
#' |
|
| 538 |
#' @keywords internal |
|
| 539 |
#' @noRd |
|
| 540 | ||
| 541 | ||
| 542 |
.brms_form_powerlaw <- function(x, y, group, dpar = FALSE, nTimes = NULL, |
|
| 543 |
useGroup = TRUE, prior = NULL, int) {
|
|
| 544 | 5x |
if (dpar) {
|
| 545 | 2x |
if (int) {
|
| 546 | 1x |
form <- brms::nlf(stats::as.formula(paste0(y, " ~ ", y, "I + (", y, "A*", x, "^", y, "B)")))
|
| 547 | 1x |
pars <- paste0(y, LETTERS[c(1:2, 9)]) |
| 548 |
} else {
|
|
| 549 | 1x |
form <- brms::nlf(stats::as.formula(paste0(y, " ~ ", y, "A*", x, "^", y, "B"))) |
| 550 | 1x |
pars <- paste0(y, LETTERS[1:2]) |
| 551 |
} |
|
| 552 |
} else {
|
|
| 553 | 3x |
if (int) {
|
| 554 | 1x |
form <- stats::as.formula(paste0(y, " ~ I + (A*", x, "^B)")) |
| 555 | 1x |
pars <- LETTERS[c(1:2, 9)] |
| 556 |
} else {
|
|
| 557 | 2x |
form <- stats::as.formula(paste0(y, " ~ A*", x, "^B")) |
| 558 | 2x |
pars <- LETTERS[1:2] |
| 559 |
} |
|
| 560 |
} |
|
| 561 | 5x |
return(list(form = form, pars = pars)) |
| 562 |
} |
|
| 563 |
#' Helper function for brms formulas |
|
| 564 |
#' |
|
| 565 |
#' @keywords internal |
|
| 566 |
#' @noRd |
|
| 567 | ||
| 568 | ||
| 569 |
.brms_form_gam <- function(x, y, group, dpar = FALSE, nTimes = NULL, |
|
| 570 |
useGroup = TRUE, prior = NULL, int) {
|
|
| 571 | 15x |
if (useGroup) {
|
| 572 | 14x |
by <- paste0(", by = ", group)
|
| 573 |
} else {
|
|
| 574 | 1x |
by <- "," |
| 575 |
} |
|
| 576 | 15x |
if (nTimes < 11) {
|
| 577 | 2x |
k <- paste0(", k = ", nTimes)
|
| 578 |
} else {
|
|
| 579 | 13x |
k <- NULL |
| 580 |
} |
|
| 581 | ||
| 582 | 15x |
form <- stats::as.formula(paste0(y, " ~ s(", x, by, k, ")"))
|
| 583 | 15x |
pars <- NULL |
| 584 | ||
| 585 | 15x |
return(list(form = form, pars = pars)) |
| 586 |
} |
|
| 587 |
#' Helper function for brms formulas |
|
| 588 |
#' |
|
| 589 |
#' @keywords internal |
|
| 590 |
#' @noRd |
|
| 591 | ||
| 592 | ||
| 593 |
.brms_form_int <- function(x, y, group, dpar = FALSE, nTimes = NULL, |
|
| 594 |
useGroup = TRUE, prior = NULL, int) {
|
|
| 595 | 46x |
if (useGroup) {
|
| 596 | 44x |
rhs <- paste0("0 + ", group)
|
| 597 |
} else {
|
|
| 598 | 2x |
rhs <- paste0("1")
|
| 599 |
} |
|
| 600 | 46x |
form <- stats::as.formula(paste0(y, " ~ ", rhs)) |
| 601 | 46x |
pars <- c() |
| 602 | 46x |
return(list(form = form, pars = pars)) |
| 603 |
} |
|
| 604 | ||
| 605 |
#' Helper function for brms formulas |
|
| 606 |
#' |
|
| 607 |
#' @keywords internal |
|
| 608 |
#' @noRd |
|
| 609 | ||
| 610 | ||
| 611 |
.brms_form_not_estimated <- function(x, y, group, dpar = FALSE, nTimes = NULL, |
|
| 612 |
useGroup = TRUE, prior = NULL, int) {
|
|
| 613 | 16x |
form <- stats::as.formula(paste0(y, " ~ 1")) |
| 614 | 16x |
pars <- c() |
| 615 | 16x |
return(list(form = form, pars = pars)) |
| 616 |
} |
|
| 617 | ||
| 618 |
#' Helper function for brms formulas |
|
| 619 |
#' |
|
| 620 |
#' @keywords internal |
|
| 621 |
#' @noRd |
|
| 622 | ||
| 623 |
.brms_form_decay <- function(formList, int = FALSE) {
|
|
| 624 | 2x |
modelForm <- formList$form |
| 625 | 2x |
chars <- as.character(modelForm) |
| 626 | 2x |
if (!int) {
|
| 627 | 1x |
formList$form <- as.formula(paste0(chars[2], chars[1], "-(", chars[3], ")"))
|
| 628 |
} else {
|
|
| 629 | 1x |
rhs <- chars[3] |
| 630 | 1x |
rhs <- trimws(gsub("I\\s?\\+", "", rhs))
|
| 631 | 1x |
formList$form <- as.formula(paste0(chars[2], chars[1], "I - (", rhs, ")"))
|
| 632 |
} |
|
| 633 | 2x |
formList |
| 634 |
} |
|
| 635 | ||
| 636 |
#' Helper function for brms formulas |
|
| 637 |
#' |
|
| 638 |
#' @keywords internal |
|
| 639 |
#' @noRd |
|
| 640 | ||
| 641 |
.brms_form_frechet <- function(x, y, group, dpar = FALSE, nTimes = NULL, |
|
| 642 |
useGroup = TRUE, prior = NULL, int) {
|
|
| 643 | 5x |
if (dpar) {
|
| 644 | 2x |
if (int) {
|
| 645 | 1x |
form <- brms::nlf(stats::as.formula(paste0( |
| 646 | 1x |
y, " ~ ", y, "I + (", y, "A * exp(-((", x, "-0)/", y, "C)^(-", y, "B)))"
|
| 647 |
))) |
|
| 648 | 1x |
pars <- paste0(y, LETTERS[c(1:3, 9)]) |
| 649 |
} else {
|
|
| 650 | 1x |
form <- brms::nlf(stats::as.formula(paste0( |
| 651 | 1x |
y, " ~ ", y, "A * exp(-((", x, "-0)/", y, "C)^(-", y, "B))"
|
| 652 |
))) |
|
| 653 | 1x |
pars <- paste0(y, LETTERS[1:3]) |
| 654 |
} |
|
| 655 |
} else {
|
|
| 656 | 3x |
if (int) {
|
| 657 | 1x |
form <- stats::as.formula(paste0(y, " ~ I + (A * exp(-((", x, "-0)/C)^(-B)))"))
|
| 658 | 1x |
pars <- LETTERS[c(1:3, 9)] |
| 659 |
} else {
|
|
| 660 | 2x |
form <- stats::as.formula(paste0(y, " ~ A * exp(-((", x, "-0)/C)^(-B))"))
|
| 661 | 2x |
pars <- LETTERS[1:3] |
| 662 |
} |
|
| 663 |
} |
|
| 664 | 5x |
return(list(form = form, pars = pars)) |
| 665 |
} |
|
| 666 | ||
| 667 |
#' Helper function for brms formulas |
|
| 668 |
#' |
|
| 669 |
#' @keywords internal |
|
| 670 |
#' @noRd |
|
| 671 | ||
| 672 |
.brms_form_weibull <- function(x, y, group, dpar = FALSE, nTimes = NULL, |
|
| 673 |
useGroup = TRUE, prior = NULL, int) {
|
|
| 674 | 5x |
if (dpar) {
|
| 675 | 2x |
if (int) {
|
| 676 | 1x |
form <- brms::nlf(stats::as.formula(paste0( |
| 677 | 1x |
y, " ~ ", y, "I + (", y, "A * (1-exp(-(", x, "/", y, "C)^", y, ")))"
|
| 678 |
))) |
|
| 679 | 1x |
pars <- paste0(y, LETTERS[c(1:3, 9)]) |
| 680 |
} else {
|
|
| 681 | 1x |
form <- brms::nlf(stats::as.formula(paste0( |
| 682 | 1x |
y, " ~ ", y, "A * (1-exp(-(", x, "/", y, "C)^", y, "))"
|
| 683 |
))) |
|
| 684 | 1x |
pars <- paste0(y, LETTERS[1:3]) |
| 685 |
} |
|
| 686 |
} else {
|
|
| 687 | 3x |
if (int) {
|
| 688 | 1x |
form <- stats::as.formula(paste0(y, " ~ I + (A * (1-exp(-(", x, "/C)^B)))"))
|
| 689 | 1x |
pars <- LETTERS[c(1:3, 9)] |
| 690 |
} else {
|
|
| 691 | 2x |
form <- stats::as.formula(paste0(y, " ~ A * (1-exp(-(", x, "/C)^B))"))
|
| 692 | 2x |
pars <- LETTERS[1:3] |
| 693 |
} |
|
| 694 |
} |
|
| 695 | 5x |
return(list(form = form, pars = pars)) |
| 696 |
} |
|
| 697 | ||
| 698 |
#' Helper function for brms formulas |
|
| 699 |
#' |
|
| 700 |
#' @keywords internal |
|
| 701 |
#' @noRd |
|
| 702 | ||
| 703 |
.brms_form_gumbel <- function(x, y, group, dpar = FALSE, nTimes = NULL, |
|
| 704 |
useGroup = TRUE, prior = NULL, int) {
|
|
| 705 | 5x |
if (dpar) {
|
| 706 | 2x |
if (int) {
|
| 707 | 1x |
form <- brms::nlf(stats::as.formula(paste0( |
| 708 | 1x |
y, " ~ ", y, "I + (", y, "A * exp(-exp( -(", x, "-", y, "B)/", y, "C)))"
|
| 709 |
))) |
|
| 710 | 1x |
pars <- paste0(y, LETTERS[c(1:3, 9)]) |
| 711 |
} else {
|
|
| 712 | 1x |
form <- brms::nlf(stats::as.formula(paste0( |
| 713 | 1x |
y, " ~ ", y, "A * exp(-exp( -(", x, "-", y, "B)/", y, "C))"
|
| 714 |
))) |
|
| 715 | 1x |
pars <- paste0(y, LETTERS[1:3]) |
| 716 |
} |
|
| 717 |
} else {
|
|
| 718 | 3x |
if (int) {
|
| 719 | 1x |
form <- stats::as.formula(paste0(y, " ~ I + (A * exp(-exp( -(", x, "-B)/C)))"))
|
| 720 | 1x |
pars <- LETTERS[c(1:3, 9)] |
| 721 |
} else {
|
|
| 722 | 2x |
form <- stats::as.formula(paste0(y, " ~ A * exp(-exp( -(", x, "-B)/C))"))
|
| 723 | 2x |
pars <- LETTERS[1:3] |
| 724 |
} |
|
| 725 |
} |
|
| 726 | 5x |
return(list(form = form, pars = pars)) |
| 727 |
} |
|
| 728 | ||
| 729 |
#' Helper function for brms formulas |
|
| 730 |
#' |
|
| 731 |
#' @keywords internal |
|
| 732 |
#' @noRd |
|
| 733 | ||
| 734 |
.brms_form_bragg <- function(x, y, group, dpar = FALSE, nTimes = NULL, |
|
| 735 |
useGroup = TRUE, prior = NULL, int) {
|
|
| 736 | 5x |
if (dpar) {
|
| 737 | 2x |
if (int) {
|
| 738 | 1x |
form <- brms::nlf(stats::as.formula(paste0( |
| 739 | 1x |
y, " ~ ", y, "I + ", y, "A * exp(-", y, "B * (", x, " - ", y, "C)^2)"
|
| 740 |
))) |
|
| 741 | 1x |
pars <- paste0(y, LETTERS[c(1:3, 9)]) |
| 742 |
} else {
|
|
| 743 | 1x |
form <- brms::nlf(stats::as.formula(paste0( |
| 744 | 1x |
y, " ~ ", y, "A * exp(-", y, "B * (", x, " - ", y, "C)^2)"
|
| 745 |
))) |
|
| 746 | 1x |
pars <- paste0(y, LETTERS[1:3]) |
| 747 |
} |
|
| 748 |
} else {
|
|
| 749 | 3x |
if (int) {
|
| 750 | 1x |
form <- stats::as.formula(paste0(y, " ~ I + A * exp(-B * (", x, " - C)^2)"))
|
| 751 | 1x |
pars <- LETTERS[c(1:3, 9)] |
| 752 |
} else {
|
|
| 753 | 2x |
form <- stats::as.formula(paste0(y, " ~ A * exp(-B * (", x, " - C)^2)"))
|
| 754 | 2x |
pars <- LETTERS[1:3] |
| 755 |
} |
|
| 756 |
} |
|
| 757 | 5x |
return(list(form = form, pars = pars)) |
| 758 |
} |
|
| 759 | ||
| 760 |
#' Helper function for brms formulas |
|
| 761 |
#' |
|
| 762 |
#' @keywords internal |
|
| 763 |
#' @noRd |
|
| 764 | ||
| 765 |
.brms_form_lorentz <- function(x, y, group, dpar = FALSE, nTimes = NULL, |
|
| 766 |
useGroup = TRUE, prior = NULL, int) {
|
|
| 767 | 5x |
if (dpar) {
|
| 768 | 2x |
if (int) {
|
| 769 | 1x |
form <- brms::nlf(stats::as.formula(paste0( |
| 770 | 1x |
y, " ~ ", y, "I + ", y, "A / (1 + ", y, "B * (", x, " - ", y, "C) ^ 2)"
|
| 771 |
))) |
|
| 772 | 1x |
pars <- paste0(y, LETTERS[c(1:3, 9)]) |
| 773 |
} else {
|
|
| 774 | 1x |
form <- brms::nlf(stats::as.formula(paste0( |
| 775 | 1x |
y, " ~ ", y, "A / (1 + ", y, "B * (", x, " - ", y, "C) ^ 2)"
|
| 776 |
))) |
|
| 777 | 1x |
pars <- paste0(y, LETTERS[1:3]) |
| 778 |
} |
|
| 779 |
} else {
|
|
| 780 | 3x |
if (int) {
|
| 781 | 1x |
form <- stats::as.formula(paste0(y, " ~ I + A / (1 + B * (", x, " - C) ^ 2)"))
|
| 782 | 1x |
pars <- LETTERS[c(1:3, 9)] |
| 783 |
} else {
|
|
| 784 | 2x |
form <- stats::as.formula(paste0(y, " ~ A / (1 + B * (", x, " - C) ^ 2)"))
|
| 785 | 2x |
pars <- LETTERS[1:3] |
| 786 |
} |
|
| 787 |
} |
|
| 788 | 5x |
return(list(form = form, pars = pars)) |
| 789 |
} |
|
| 790 | ||
| 791 |
#' Helper function for brms formulas |
|
| 792 |
#' |
|
| 793 |
#' @keywords internal |
|
| 794 |
#' @noRd |
|
| 795 | ||
| 796 |
.brms_form_beta <- function(x, y, group, dpar = FALSE, nTimes = NULL, |
|
| 797 |
useGroup = TRUE, prior = NULL, int) {
|
|
| 798 | 5x |
if (dpar) {
|
| 799 | 2x |
if (int) {
|
| 800 | 1x |
form <- brms::nlf(stats::as.formula(paste0( |
| 801 | 1x |
y, " ~ ", y, "I + ", y, "A * (((", x, " - ", y, "D) / (", y, "C - ", y, "D)) * ((", y, "E - ", x,
|
| 802 | 1x |
") / (", y, "E - ", y, "C)) ^ ((", y, "E - ", y, "C) / (", y, "C - ", y, "D))) ^ ", y, "B"
|
| 803 |
))) |
|
| 804 | 1x |
pars <- paste0(y, LETTERS[c(1:5, 9)]) |
| 805 |
} else {
|
|
| 806 | 1x |
form <- brms::nlf(stats::as.formula(paste0( |
| 807 | 1x |
y, " ~ ", y, "A * (((", x, " - ", y, "D) / (", y, "C - ", y, "D)) * ((", y, "E - ", x,
|
| 808 | 1x |
") / (", y, "E - ", y, "C)) ^ ((", y, "E - ", y, "C) / (", y, "C - ", y, "D))) ^ ", y, "B"
|
| 809 |
))) |
|
| 810 | 1x |
pars <- paste0(y, LETTERS[1:5]) |
| 811 |
} |
|
| 812 |
} else {
|
|
| 813 | 3x |
if (int) {
|
| 814 | 1x |
form <- stats::as.formula(paste0( |
| 815 | 1x |
y, " ~ I + A * (((", x, " - D) / (C - D)) * ((E - ", x,
|
| 816 | 1x |
") / (E - C)) ^ ((E - C) / (C - D))) ^ B" |
| 817 |
)) |
|
| 818 | 1x |
pars <- LETTERS[c(1:5, 9)] |
| 819 |
} else {
|
|
| 820 | 2x |
form <- stats::as.formula(paste0( |
| 821 | 2x |
y, " ~ A * (((", x, " - D) / (C - D)) * ((E - ", x,
|
| 822 | 2x |
") / (E - C)) ^ ((E - C) / (C - D))) ^ B" |
| 823 |
)) |
|
| 824 | 2x |
pars <- LETTERS[1:5] |
| 825 |
} |
|
| 826 |
} |
|
| 827 | 5x |
return(list(form = form, pars = pars)) |
| 828 |
} |
| 1 |
#' Time conversion and plotting for bellwether data |
|
| 2 |
#' |
|
| 3 |
#' @param df Data frame to use, this can be in wide or long format. |
|
| 4 |
#' @param mode One of "DAS", "DAP" or "DAE" (Days After Planting and Days After Emergence). |
|
| 5 |
#' Defaults to adding all columns. |
|
| 6 |
#' Note that if timeCol is not an integer then DAS is always returned. |
|
| 7 |
#' @param plantingDelay If `mode` includes "DAP" then `plantingDelay` is used to adjust "DAS" |
|
| 8 |
#' @param phenotype If `mode` includes "DAE" then this is the phenotype used to classify emergence. |
|
| 9 |
#' @param cutoff If `mode` inlcludes "DAE" then this value is used to classify emergence. |
|
| 10 |
#' Defaults to 1, meaning an image with a value of 1 or more for `phenotype` has "emerged". |
|
| 11 |
#' @param timeCol Column of input time values, defaults to "timestamp". |
|
| 12 |
#' If this is not an integer then it is assumed to be |
|
| 13 |
#' a timestamp in the format of the format argument. |
|
| 14 |
#' @param group Grouping variables to specify unique plants as a character vector. |
|
| 15 |
#' This defaults to "Barcodes". These taken together should identify a unique plant across time, |
|
| 16 |
#' although often "angle" or "rotation" should be added. |
|
| 17 |
#' @param plot Logical, should plots of the new time variables be printed? |
|
| 18 |
#' @param format An R POSIXct format, defaults to lemnatech standard format. |
|
| 19 |
#' This is only used if timeCol is not an integer. |
|
| 20 |
#' @param traitCol Column with phenotype names, defaults to "trait". |
|
| 21 |
#' This should generally not need to be changed from the default. |
|
| 22 |
#' If this and valueCol are present in colnames(df) then the data |
|
| 23 |
#' is assumed to be in long format. |
|
| 24 |
#' @param valueCol Column with phenotype values, defaults to "value". |
|
| 25 |
#' This should generally not need to be changed from the default. |
|
| 26 |
#' @param index Optionally a time to use as the beginning of the experiment. This |
|
| 27 |
#' may be useful if you have multiple datasets or you are adding data from bw.water |
|
| 28 |
#' and plants were watered before being imaged or if you want to index days off of |
|
| 29 |
#' midnight. This defaults to NULL but will take any value coercible to POSIXct by |
|
| 30 |
#' \code{as.POSIXct(... , tz="UTC")} such as "2020-01-01 18:30:00"
|
|
| 31 |
#' @keywords DAS time ggplot |
|
| 32 |
#' @import ggplot2 |
|
| 33 |
#' @return The input dataframe with new integer columns for different ways |
|
| 34 |
#' of describing time in the experiment. If plot is TRUE then a ggplot is also returned as part of |
|
| 35 |
#' a list. |
|
| 36 |
#' @export |
|
| 37 |
#' @examples |
|
| 38 |
#' \donttest{
|
|
| 39 |
#' sv <- read.pcv( |
|
| 40 |
#' "https://raw.githubusercontent.com/joshqsumner/pcvrTestData/main/pcv4-single-value-traits.csv", |
|
| 41 |
#' mode = "wide", reader = "fread" |
|
| 42 |
#' ) |
|
| 43 |
#' sv$genotype = substr(sv$barcode, 3, 5) |
|
| 44 |
#' sv$genotype = ifelse(sv$genotype == "002", "B73", |
|
| 45 |
#' ifelse(sv$genotype == "003", "W605S", |
|
| 46 |
#' ifelse(sv$genotype == "004", "MM", "Mo17") |
|
| 47 |
#' ) |
|
| 48 |
#' ) |
|
| 49 |
#' sv$fertilizer = substr(sv$barcode, 8, 8) |
|
| 50 |
#' sv$fertilizer = ifelse(sv$fertilizer == "A", "100", |
|
| 51 |
#' ifelse(sv$fertilizer == "B", "50", "0") |
|
| 52 |
#' ) |
|
| 53 |
#' sv <- bw.time(sv, |
|
| 54 |
#' plantingDelay = 0, phenotype = "area_pixels", cutoff = 10, |
|
| 55 |
#' timeCol = "timestamp", group = c("barcode", "rotation"), plot = FALSE
|
|
| 56 |
#' ) |
|
| 57 |
#' |
|
| 58 |
#' |
|
| 59 |
#' svl <- read.pcv( |
|
| 60 |
#' "https://raw.githubusercontent.com/joshqsumner/pcvrTestData/main/pcv4-single-value-traits.csv", |
|
| 61 |
#' mode = "long", reader = "fread" |
|
| 62 |
#' ) |
|
| 63 |
#' svl$genotype = substr(svl$barcode, 3, 5) |
|
| 64 |
#' svl$genotype = ifelse(svl$genotype == "002", "B73", |
|
| 65 |
#' ifelse(svl$genotype == "003", "W605S", |
|
| 66 |
#' ifelse(svl$genotype == "004", "MM", "Mo17") |
|
| 67 |
#' ) |
|
| 68 |
#' ) |
|
| 69 |
#' svl$fertilizer = substr(svl$barcode, 8, 8) |
|
| 70 |
#' svl$fertilizer = ifelse(svl$fertilizer == "A", "100", |
|
| 71 |
#' ifelse(svl$fertilizer == "B", "50", "0") |
|
| 72 |
#' ) |
|
| 73 |
#' svl <- bw.time(svl, |
|
| 74 |
#' plantingDelay = 0, phenotype = "area_pixels", cutoff = 10, timeCol = "timestamp", |
|
| 75 |
#' group = c("barcode", "rotation"), plot = FALSE
|
|
| 76 |
#' ) |
|
| 77 |
#' } |
|
| 78 |
#' |
|
| 79 |
bw.time <- function(df = NULL, mode = c("DAS", "DAP", "DAE"), plantingDelay = NULL,
|
|
| 80 |
phenotype = NULL, cutoff = 1, timeCol = "timestamp", |
|
| 81 |
group = "Barcodes", plot = TRUE, format = "%Y-%m-%d %H:%M:%S", |
|
| 82 |
traitCol = "trait", valueCol = "value", index = NULL) {
|
|
| 83 | 2x |
wide <- .detectWideData(df, traitCol, valueCol) # see bwoutliers |
| 84 | ||
| 85 | 2x |
if (is.null(plantingDelay) && "DAP" %in% mode) {
|
| 86 | 1x |
mode <- mode[-which(mode == "DAP")] |
| 87 |
} |
|
| 88 | 2x |
if (is.null(phenotype) && "DAE" %in% mode) {
|
| 89 | 1x |
mode <- mode[-which(mode == "DAE")] |
| 90 |
} |
|
| 91 | ||
| 92 | 2x |
formatNonIntegerTimeRes <- .formatNonIntegerTime(df, timeCol, format, index) |
| 93 | 2x |
df <- formatNonIntegerTimeRes[["data"]] |
| 94 | 2x |
timeCol <- formatNonIntegerTimeRes[["timeCol"]] |
| 95 | ||
| 96 | 2x |
if ("DAP" %in% mode) {
|
| 97 | 1x |
df$DAP <- df[[timeCol]] + plantingDelay |
| 98 |
} |
|
| 99 | 2x |
if ("DAE" %in% mode) {
|
| 100 | 1x |
df <- .daeHelper(df, group, wide, phenotype, cutoff, timeCol, traitCol, valueCol) |
| 101 |
} |
|
| 102 | ||
| 103 | 2x |
rownames(df) <- NULL |
| 104 | 2x |
if (plot) {
|
| 105 | 1x |
p <- .timePlottingHelper(df, phenotype, group, wide, mode, traitCol, valueCol) |
| 106 | 1x |
df <- list("data" = df, "plot" = p)
|
| 107 |
} |
|
| 108 | 2x |
return(df) |
| 109 |
} |
|
| 110 | ||
| 111 |
#' *********************************************************************************************** |
|
| 112 |
#' *************** `fix non date times` **************************************** |
|
| 113 |
#' *********************************************************************************************** |
|
| 114 |
#' @description |
|
| 115 |
#' Internal function for time handling |
|
| 116 |
#' |
|
| 117 |
#' @keywords internal |
|
| 118 |
#' @noRd |
|
| 119 | ||
| 120 |
.formatNonIntegerTime <- function(df, timeCol, format, index) {
|
|
| 121 | 2x |
if (!is.integer(df[[timeCol]])) {
|
| 122 | 2x |
df[[timeCol]] <- as.POSIXct(strptime(df[[timeCol]], format = format)) |
| 123 | 2x |
beg <- as.POSIXct(index, tz = "UTC") |
| 124 | 2x |
if (is.null(index)) {
|
| 125 | 2x |
beg <- min(df[[timeCol]], na.rm = TRUE) |
| 126 |
} |
|
| 127 | 2x |
df$DAS <- floor(as.numeric((df[[timeCol]] - beg) / 60 / 60 / 24)) |
| 128 | 2x |
timeCol <- "DAS" |
| 129 |
} |
|
| 130 | 2x |
return(list("data" = df, "timeCol" = timeCol))
|
| 131 |
} |
|
| 132 | ||
| 133 | ||
| 134 |
#' *********************************************************************************************** |
|
| 135 |
#' *************** `time plotting Helper` **************************************** |
|
| 136 |
#' *********************************************************************************************** |
|
| 137 |
#' @description |
|
| 138 |
#' Internal function for plotting time data |
|
| 139 |
#' |
|
| 140 |
#' @keywords internal |
|
| 141 |
#' @noRd |
|
| 142 | ||
| 143 |
.timePlottingHelper <- function(df, phenotype, group, wide, mode, traitCol, valueCol) {
|
|
| 144 | 1x |
if (wide) {
|
| 145 | 1x |
plotDat <- df |
| 146 | 1x |
plotDat$plotGroup <- interaction(plotDat[, c(group)]) |
| 147 | 1x |
p <- lapply(mode, function(m) {
|
| 148 | 3x |
ggplot2::ggplot(plotDat, ggplot2::aes( |
| 149 | 3x |
x = .data[[m]], y = .data[[phenotype]], |
| 150 | 3x |
group = .data$plotGroup |
| 151 |
)) + |
|
| 152 | 3x |
ggplot2::geom_line() + |
| 153 | 3x |
ggplot2::labs(x = m, y = phenotype, title = m) + |
| 154 | 3x |
pcv_theme() |
| 155 |
}) |
|
| 156 | 1x |
return(p) |
| 157 | ! |
} else if (!wide) {
|
| 158 | ! |
plotDat <- df[df[[traitCol]] == phenotype, ] |
| 159 | ! |
plotDat$plotGroup <- interaction(plotDat[, c(group)]) |
| 160 | ! |
p <- lapply(mode, function(m) {
|
| 161 | ! |
ggplot2::ggplot(plotDat, ggplot2::aes( |
| 162 | ! |
x = .data[[m]], y = .data[[valueCol]], |
| 163 | ! |
group = .data$plotGroup |
| 164 |
)) + |
|
| 165 | ! |
ggplot2::geom_line() + |
| 166 | ! |
ggplot2::labs(x = m, y = phenotype, title = m) + |
| 167 | ! |
pcv_theme() |
| 168 |
}) |
|
| 169 | ! |
return(p) |
| 170 |
} |
|
| 171 |
} |
|
| 172 | ||
| 173 |
#' *********************************************************************************************** |
|
| 174 |
#' *************** `DAE Helper` **************************************** |
|
| 175 |
#' *********************************************************************************************** |
|
| 176 |
#' @description |
|
| 177 |
#' Internal function for dae calculation |
|
| 178 |
#' |
|
| 179 |
#' @keywords internal |
|
| 180 |
#' @noRd |
|
| 181 | ||
| 182 |
.daeHelper <- function(df, group, wide, phenotype, cutoff, timeCol, traitCol, valueCol) {
|
|
| 183 | 1x |
if (wide) {
|
| 184 | 1x |
dae_split <- interaction(df[, group]) |
| 185 | 1x |
df <- do.call(rbind, lapply(split(df, dae_split), function(d) {
|
| 186 | 176x |
subd <- d[d[[phenotype]] >= cutoff & !is.na(d[[phenotype]]), ] |
| 187 | 176x |
if (nrow(subd) == 0) {
|
| 188 | 1x |
subd <- data.frame(DAS = max(df[[timeCol]]) + 1) |
| 189 | 1x |
colnames(subd) <- timeCol |
| 190 | 1x |
} # if all NA area then remove all rows |
| 191 | 176x |
d$DAE <- d[[timeCol]] - min(subd[[timeCol]], na.rm = TRUE) |
| 192 | 176x |
d |
| 193 |
})) |
|
| 194 |
} else {
|
|
| 195 | ! |
dae_split <- interaction(df[, group]) |
| 196 | ! |
df <- do.call(rbind, lapply(split(df, dae_split), function(d) {
|
| 197 | ! |
subd <- d[d[[traitCol]] == phenotype & d[[valueCol]] >= cutoff & !is.na(d[[valueCol]]), ] |
| 198 | ! |
if (nrow(subd) == 0) {
|
| 199 | ! |
subd <- data.frame(DAS = max(df[[timeCol]]) + 1) |
| 200 | ! |
colnames(subd) <- timeCol |
| 201 |
} |
|
| 202 | ! |
d$DAE <- d[[timeCol]] - min(subd[[timeCol]], na.rm = TRUE) |
| 203 | ! |
d |
| 204 |
})) |
|
| 205 |
} |
|
| 206 | 1x |
return(df) |
| 207 |
} |
| 1 |
#' @description |
|
| 2 |
#' Internal function for calculating posterior distribution of \Lambda parameter for poisson data. |
|
| 3 |
#' The conjugate prior on Lambda is a Gamma(A,B) |
|
| 4 |
#' A=B=0.5 is a reasonable weak default prior |
|
| 5 |
#' |
|
| 6 |
#' So if leaf count is Poisson distributed: |
|
| 7 |
#' count ~ Pois(\labmda) |
|
| 8 |
#' \labmda ~ gamma(A, B) |
|
| 9 |
#' A = A_[prior] + sum(x) |
|
| 10 |
#' B = B_[prior] / (1+n) |
|
| 11 |
#' |
|
| 12 |
#' via MoM \hat(\labmda) = = 1/n +sum^1_n(x) |
|
| 13 |
#' @param s1 A vector of numerics drawn from a beta distribution. |
|
| 14 |
#' @examples |
|
| 15 |
#' |
|
| 16 |
#' .conj_poisson_sv( |
|
| 17 |
#' s1 = rpois(20, 10), priors = list(a = c(0.5, 0.5), b = c(0.5, 0.5)), |
|
| 18 |
#' plot = FALSE |
|
| 19 |
#' ) |
|
| 20 |
#' |
|
| 21 |
#' @keywords internal |
|
| 22 |
#' @noRd |
|
| 23 | ||
| 24 |
.conj_poisson_sv <- function(s1 = NULL, priors = NULL, |
|
| 25 |
plot = FALSE, support = NULL, cred.int.level = NULL, |
|
| 26 |
calculatingSupport = FALSE) {
|
|
| 27 |
#* `Check samples` |
|
| 28 | 13x |
if (any(abs(s1 - round(s1)) > .Machine$double.eps^0.5) || any(s1 < 0)) {
|
| 29 | 1x |
stop("Only positive integers can be used in the Poisson distribution")
|
| 30 |
} |
|
| 31 |
#* `make default prior if none provided` |
|
| 32 | 12x |
if (is.null(priors)) {
|
| 33 | 4x |
priors <- list(a = 0.5, b = 0.5) # gamma prior on lambda |
| 34 |
} |
|
| 35 | ||
| 36 | 12x |
out <- list() |
| 37 | ||
| 38 |
#* `Use conjugate gamma prior on lambda` |
|
| 39 | 12x |
a1_prime <- priors$a[1] + sum(s1) |
| 40 | 12x |
b1_prime <- priors$b[1] + length(s1) |
| 41 |
#* `Define support if it is missing` |
|
| 42 | 12x |
if (is.null(support) && calculatingSupport) {
|
| 43 | 6x |
quantiles <- qgamma(c(0.0001, 0.9999), a1_prime, b1_prime) |
| 44 | 6x |
return(quantiles) |
| 45 |
} |
|
| 46 |
#* `calculate density over support`` |
|
| 47 | 6x |
dens1 <- dgamma(support, a1_prime, b1_prime) |
| 48 | 6x |
pdf1 <- dens1 / sum(dens1) |
| 49 | ||
| 50 |
#* `calculate highest density interval` |
|
| 51 | 6x |
hdi1 <- qgamma(c((1 - cred.int.level) / 2, (1 - ((1 - cred.int.level) / 2))), a1_prime, b1_prime) |
| 52 | ||
| 53 |
#* `calculate highest density estimate`` |
|
| 54 | 6x |
hde1 <- .gammaHDE(shape = a1_prime, scale = 1 / b1_prime) |
| 55 | ||
| 56 |
#* `save summary and parameters` |
|
| 57 | 6x |
out$summary <- data.frame(HDE_1 = hde1, HDI_1_low = hdi1[1], HDI_1_high = hdi1[2]) |
| 58 | 6x |
out$posterior$a <- a1_prime |
| 59 | 6x |
out$posterior$b <- b1_prime |
| 60 |
#* `Make Posterior Draws` |
|
| 61 | 6x |
out$posteriorDraws <- rgamma(10000, a1_prime, b1_prime) |
| 62 | 6x |
out$pdf <- pdf1 |
| 63 |
#* `keep data for plotting` |
|
| 64 | 6x |
if (plot) {
|
| 65 | 4x |
out$plot_df <- data.frame( |
| 66 | 4x |
"range" = support, "prob" = pdf1, |
| 67 | 4x |
"sample" = rep("Sample 1", length(support))
|
| 68 |
) |
|
| 69 |
} |
|
| 70 | 6x |
return(out) |
| 71 |
} |
| 1 |
#' Function to visualize models made by \link{fitGrowth}.
|
|
| 2 |
#' |
|
| 3 |
#' Models fit using \link{growthSS} inputs by \link{fitGrowth}
|
|
| 4 |
#' (and similar models made through other means) can be visualized easily using this function. |
|
| 5 |
#' |
|
| 6 |
#' |
|
| 7 |
#' @param fit A model fit object (or a list of \code{nlrq} models) as returned by \code{fitGrowth}.
|
|
| 8 |
#' @param form A formula similar to that in \code{growthSS} inputs (or the \code{pcvrForm} part of the
|
|
| 9 |
#' output) specifying the outcome, predictor, and grouping structure of the data as |
|
| 10 |
#' \code{outcome ~ predictor|individual/group}. Generally this is given directly from
|
|
| 11 |
#' the growthSS output (\code{ss$pcvrForm}). If the formula does not include both individuals
|
|
| 12 |
#' and groups then lines from the data will not be plotted which may be best if your data does not |
|
| 13 |
#' specify unique individuals and your model does not include autocorrelation. |
|
| 14 |
#' @param groups An optional set of groups to keep in the plot. |
|
| 15 |
#' Defaults to NULL in which case all groups in the model are plotted. |
|
| 16 |
#' @param df A dataframe to use in plotting observed growth curves on top of the model and for making |
|
| 17 |
#' predictions. |
|
| 18 |
#' @param timeRange An optional range of times to use. This can be used to view predictions for |
|
| 19 |
#' future data if the avaiable data has not reached some point (such as asymptotic size). |
|
| 20 |
#' @param facetGroups logical, should groups be separated in facets? Defaults to TRUE. |
|
| 21 |
#' @param groupFill logical, should groups have different colors? Defaults to the opposite of |
|
| 22 |
#' facetGroups. If TRUE then |
|
| 23 |
#' viridis colormaps are used in the order c('plasma', 'mako', 'viridis', 'inferno', 'cividis', 'magma',
|
|
| 24 |
#' 'turbo', 'rocket'). Alternatively this can be given as a vector of |
|
| 25 |
#' viridis colormap names to use in a different order than above. |
|
| 26 |
#' Note that for brms models this is ignored except if used to specify a different viridis color map |
|
| 27 |
#' to use. |
|
| 28 |
#' @param hierarchy_value If a hierarchical model is being plotted, what value should the |
|
| 29 |
#' hiearchical predictor be? If left NULL (the default) the mean value is used. |
|
| 30 |
#' @keywords growth-curve |
|
| 31 |
#' @importFrom methods is |
|
| 32 |
#' @examples |
|
| 33 |
#' |
|
| 34 |
#' simdf <- growthSim("logistic",
|
|
| 35 |
#' n = 20, t = 25, |
|
| 36 |
#' params = list("A" = c(200, 160), "B" = c(13, 11), "C" = c(3, 3.5))
|
|
| 37 |
#' ) |
|
| 38 |
#' ss <- growthSS( |
|
| 39 |
#' model = "logistic", form = y ~ time | id / group, |
|
| 40 |
#' df = simdf, type = "nls" |
|
| 41 |
#' ) |
|
| 42 |
#' fit <- fitGrowth(ss) |
|
| 43 |
#' growthPlot(fit, form = ss$pcvrForm, df = ss$df) |
|
| 44 |
#' |
|
| 45 |
#' @return Returns a ggplot showing a brms model's credible |
|
| 46 |
#' intervals and optionally the individual growth lines. |
|
| 47 |
#' |
|
| 48 |
#' @export |
|
| 49 | ||
| 50 |
growthPlot <- function(fit, form, groups = NULL, df = NULL, timeRange = NULL, |
|
| 51 |
facetGroups = TRUE, groupFill = !facetGroups, hierarchy_value = NULL) {
|
|
| 52 | 9x |
if (is.logical(groupFill)) {
|
| 53 | 8x |
virMaps <- c("plasma", "mako", "viridis", "inferno", "cividis", "magma", "turbo", "rocket")
|
| 54 |
} else {
|
|
| 55 | 1x |
virMaps <- groupFill |
| 56 | 1x |
groupFill <- TRUE |
| 57 |
} |
|
| 58 | 9x |
model_class <- class(fit)[1] |
| 59 | 9x |
if (methods::is(fit, "list")) {
|
| 60 | 2x |
model_class <- class(fit[[1]]) |
| 61 |
} |
|
| 62 | 9x |
if (methods::is(fit, "brmsfit")) {
|
| 63 | ! |
if (attr(fit$formula$formula, "nl")) { # non linear models are growth models
|
| 64 | ! |
plot <- brmPlot( |
| 65 | ! |
fit = fit, form = form, groups = groups, df = df, timeRange = timeRange, |
| 66 | ! |
facetGroups = facetGroups, hierarchy_value = hierarchy_value, vir_option = virMaps[1] |
| 67 |
) |
|
| 68 |
} else { # linear models are survival models
|
|
| 69 | ! |
plot <- brmSurvPlot( |
| 70 | ! |
fit = fit, form = form, groups = groups, df = df, timeRange = timeRange, |
| 71 | ! |
facetGroups = facetGroups |
| 72 |
) |
|
| 73 |
} |
|
| 74 |
} else {
|
|
| 75 | 9x |
plottingFunction <- match.fun(paste0(model_class, "Plot")) |
| 76 | 9x |
plot <- plottingFunction( |
| 77 | 9x |
fit = fit, form = form, groups = groups, df = df, timeRange = timeRange, |
| 78 | 9x |
facetGroups = facetGroups, groupFill = groupFill, virMaps |
| 79 |
) |
|
| 80 |
} |
|
| 81 | 9x |
return(plot) |
| 82 |
} |
| 1 |
#' Ease of use nlme starter function for standard growth model parameterizations |
|
| 2 |
#' |
|
| 3 |
#' Internal to growthSS |
|
| 4 |
#' |
|
| 5 |
#' @param model One of the 8 model options in growthSS |
|
| 6 |
#' @param form A pcvr style form, see growthSS |
|
| 7 |
#' @param sigma One of "int", "power", or "exp", which will correspond to varIdent, varPower, or varExp |
|
| 8 |
#' respectively. |
|
| 9 |
#' This can also take a varFunc object. |
|
| 10 |
#' @param df a dataframe to use to make the model. |
|
| 11 |
#' @param pars optional parameters to vary by group as fixed effects. |
|
| 12 |
#' @param start Starting values. These are optional unless model is a double sigmoid. |
|
| 13 |
#' For any other model these will be estimated from the data if left NULL. |
|
| 14 |
#' |
|
| 15 |
#' @examples |
|
| 16 |
#' |
|
| 17 |
#' simdf <- growthSim("logistic",
|
|
| 18 |
#' n = 20, t = 25, |
|
| 19 |
#' params = list("A" = c(200, 160), "B" = c(13, 11), "C" = c(3, 3.5))
|
|
| 20 |
#' ) |
|
| 21 |
#' |
|
| 22 |
#' ss <- .nlmeSS( |
|
| 23 |
#' model = "logistic", form = y ~ time | id / group, |
|
| 24 |
#' sigma = "power", df = simdf, start = NULL |
|
| 25 |
#' ) |
|
| 26 |
#' |
|
| 27 |
#' ss <- .nlmeSS( |
|
| 28 |
#' model = "gam", form = y ~ time | id / group, |
|
| 29 |
#' sigma = "power", df = simdf, start = NULL |
|
| 30 |
#' ) |
|
| 31 |
#' |
|
| 32 |
#' dim(ss$df) |
|
| 33 |
#' ss[c("formula", "taus", "start", "pcvrForm")]
|
|
| 34 |
#' |
|
| 35 |
#' @import lmeSplines |
|
| 36 |
#' @importFrom nlme varPower varIdent varExp nlme nlme.formula corAR1 pdIdent |
|
| 37 |
#' @importFrom stats as.formula setNames |
|
| 38 |
#' @importFrom methods is |
|
| 39 |
#' |
|
| 40 |
#' @keywords internal |
|
| 41 |
#' @noRd |
|
| 42 | ||
| 43 | ||
| 44 |
.nlmeSS <- function(model, form, sigma, df, pars = NULL, start = NULL, int = FALSE) {
|
|
| 45 |
#* `general steps` |
|
| 46 |
#* ***** `Define choices and make empty output list` |
|
| 47 | 21x |
out <- list() |
| 48 | 21x |
models <- c( |
| 49 | 21x |
"logistic", "gompertz", "monomolecular", |
| 50 | 21x |
"exponential", "linear", "power law", |
| 51 | 21x |
"double logistic", "double gompertz", "gam", |
| 52 | 21x |
"frechet", "weibull", "gumbel", "logarithmic", "bragg", "lorentz", "beta" |
| 53 |
) |
|
| 54 | 21x |
sigmas <- c("none", "int", "power", "exp")
|
| 55 |
#* check if sigma is class "varFunc", if it is then return it as is? |
|
| 56 | ||
| 57 |
#* ***** `Make nlme model formula` ***** |
|
| 58 |
#* `parse form argument` |
|
| 59 | 21x |
parsed_form <- .parsePcvrForm(form, df) |
| 60 | 21x |
y <- parsed_form$y |
| 61 | 21x |
x <- parsed_form$x |
| 62 | 21x |
individual <- parsed_form$individual |
| 63 | 21x |
group <- parsed_form$group |
| 64 | 21x |
df <- parsed_form$data |
| 65 | ||
| 66 |
#* `make group a factor for nlme` |
|
| 67 | 21x |
df[[group]] <- as.factor(df[[group]]) |
| 68 | 21x |
df[[paste0(group, "_numericLabel")]] <- unclass(df[[group]]) |
| 69 |
#* `make an interaction variable for autocorrelation` |
|
| 70 |
#* Note that nlme does not allow random effects and correlations to apply at different "scales" |
|
| 71 |
#* so A,B,C can either vary by this interaction variable to have autocorrelation accurately modeled |
|
| 72 |
#* OR A,B,C can be estimated by group and autocorrelation can be by group. Currently this option is |
|
| 73 |
#* used. This note is kept here for reference. |
|
| 74 | 21x |
df[[paste0(group, individual)]] <- interaction(df[[group]], df[[individual]]) |
| 75 | ||
| 76 |
#* `assemble growth formula with FE, RE, Groups, and Weights` |
|
| 77 | 21x |
if (grepl("decay", model)) {
|
| 78 | ! |
decay <- TRUE |
| 79 | ! |
model <- trimws(gsub("decay", "", model))
|
| 80 |
} else {
|
|
| 81 | 21x |
decay <- FALSE |
| 82 |
} |
|
| 83 | ||
| 84 | 21x |
matched_model <- match.arg(model, models) |
| 85 | ||
| 86 | 21x |
if (is.character(sigma)) {
|
| 87 | 21x |
matched_sigma <- match.arg(sigma, sigmas) |
| 88 |
} else {
|
|
| 89 | ! |
matched_sigma <- sigma |
| 90 |
} |
|
| 91 | ||
| 92 | 21x |
stringFormFun <- paste0(".nlme_form_", gsub(" ", "", matched_model))
|
| 93 | 21x |
form_fun <- match.fun(stringFormFun) |
| 94 | 21x |
if (matched_model == "gam") { # gam takes some extra work
|
| 95 | 1x |
df$splines <- lmeSplines::smspline(as.formula(paste0("~ ", x)), data = df) # spline setup
|
| 96 | 1x |
start <- 0 # no starting values (no parameters) |
| 97 | 1x |
pars <- df # there are no pars, this is just to pass df to pdIdent for the splines |
| 98 |
} |
|
| 99 | 21x |
growthFormList <- form_fun(x, y, group, individual, matched_sigma, pars, int) |
| 100 | 21x |
pars <- growthFormList$pars |
| 101 | 21x |
growthFormList <- growthFormList[!grepl("pars", names(growthFormList))]
|
| 102 | 21x |
if (decay) {
|
| 103 | ! |
growthFormList <- .nlmeDecay(growthFormList) |
| 104 |
} |
|
| 105 | ||
| 106 |
#* `Make starting values` |
|
| 107 | 21x |
if (matched_model == "gam") {
|
| 108 | 1x |
start <- 0 |
| 109 |
} |
|
| 110 | 21x |
if (is.null(start)) {
|
| 111 | 20x |
if (grepl("double", matched_model)) {
|
| 112 | 2x |
warning(paste0( |
| 113 | 2x |
"Double Sigmoid models are not supported as self-starting models, ", |
| 114 | 2x |
"you will need to add starting parameters.", |
| 115 | 2x |
"Note for these models type='brms' is recommended." |
| 116 |
)) |
|
| 117 | 2x |
startingValues <- NULL |
| 118 |
} else {
|
|
| 119 | 18x |
stringInitFun <- paste0(".init", gsub(" ", "", matched_model))
|
| 120 | 18x |
initFunction <- match.fun(stringInitFun) |
| 121 | 18x |
startingValues <- initFunction(df, x, y, int) |
| 122 |
} |
|
| 123 | 20x |
startingValuesList <- unlist(lapply(names(startingValues), function(nm) {
|
| 124 | 49x |
val <- startingValues[nm] |
| 125 | 49x |
if (nm %in% pars) {
|
| 126 | 43x |
rep(val, length(unique(df[[group]]))) |
| 127 |
# if this is one of pars then make starting value per group |
|
| 128 |
} else {
|
|
| 129 | 6x |
val |
| 130 | 20x |
} # else return one starting value |
| 131 |
})) |
|
| 132 |
} else {
|
|
| 133 | 1x |
startingValuesList <- start |
| 134 |
} |
|
| 135 | ||
| 136 |
#* `return model components` |
|
| 137 | ||
| 138 | 21x |
out[["formula"]] <- growthFormList |
| 139 | 21x |
out[["start"]] <- startingValuesList |
| 140 | 21x |
out[["df"]] <- df |
| 141 | 21x |
out[["pcvrForm"]] <- form |
| 142 | 21x |
return(out) |
| 143 |
} |
|
| 144 | ||
| 145 | ||
| 146 | ||
| 147 |
#* `Sigma matching helper function` |
|
| 148 | ||
| 149 |
.nlme_sigma_form <- function(matched_sigma, x, group) {
|
|
| 150 | 70x |
if (group == "dummyGroup") {
|
| 151 | 15x |
group <- NULL |
| 152 |
} |
|
| 153 |
#* `variance formula` |
|
| 154 | 70x |
if (methods::is(matched_sigma, "varFunc")) {
|
| 155 | 1x |
weights_form <- matched_sigma |
| 156 | 69x |
} else if (matched_sigma %in% c("int", "none")) {
|
| 157 | 2x |
weights_form <- nlme::varIdent( |
| 158 | 2x |
form = stats::as.formula(paste(c("~1", group), collapse = "|"))
|
| 159 |
) |
|
| 160 | 67x |
} else if (matched_sigma == "power") {
|
| 161 | 66x |
weights_form <- nlme::varPower( |
| 162 | 66x |
form = stats::as.formula(paste(c(paste0("~", x), group), collapse = "|"))
|
| 163 |
) |
|
| 164 | 1x |
} else if (matched_sigma == "exp") {
|
| 165 | 1x |
weights_form <- nlme::varExp( |
| 166 | 1x |
form = stats::as.formula(paste(c(paste0("~", x), group), collapse = "|"))
|
| 167 |
) |
|
| 168 |
} |
|
| 169 | 70x |
if (is.null(group)) {
|
| 170 | ||
| 171 |
} |
|
| 172 | 70x |
return(weights_form) |
| 173 |
} |
|
| 174 | ||
| 175 | ||
| 176 |
#* `Define growth formulas` |
|
| 177 | ||
| 178 |
.nlme_form_logistic <- function(x, y, group, individual, matched_sigma, pars, int) {
|
|
| 179 |
#* `Define parameters and main growth formula` |
|
| 180 | 9x |
if (int) {
|
| 181 | 1x |
total_pars <- c("I", "A", "B", "C")
|
| 182 | 1x |
model_form <- as.formula(paste0(y, " ~ I + (A/(1+exp((B-", x, ")/C)))")) |
| 183 |
} else {
|
|
| 184 | 8x |
total_pars <- c("A", "B", "C")
|
| 185 | 8x |
model_form <- as.formula(paste0(y, " ~ A/(1+exp((B-", x, ")/C))")) |
| 186 |
} |
|
| 187 |
#* `random effects formula` |
|
| 188 | 9x |
random_form <- as.formula(paste0(paste0(total_pars, collapse = " + "), "~ 1")) |
| 189 |
#* `fixed effects formula` |
|
| 190 | 9x |
if (is.null(pars)) {
|
| 191 | 6x |
pars <- total_pars |
| 192 |
} |
|
| 193 | 9x |
if (is.null(group) || group == "dummyGroup") {
|
| 194 | 1x |
pars <- "" |
| 195 |
} |
|
| 196 | 9x |
fixed_form <- lapply(total_pars, function(par) {
|
| 197 | 28x |
if (par %in% pars) {
|
| 198 | 16x |
stats::as.formula(paste0(par, " ~ 0 + ", group)) |
| 199 |
} else {
|
|
| 200 | 12x |
stats::as.formula(paste0(par, " ~ 1")) |
| 201 |
} |
|
| 202 |
}) |
|
| 203 |
#* `groups formula` |
|
| 204 | 9x |
groups_form <- stats::as.formula(paste0("~", group))
|
| 205 |
#* `variance formula` |
|
| 206 | 9x |
weights_form <- .nlme_sigma_form(matched_sigma, x, group) |
| 207 |
#* `correlation formula` |
|
| 208 | 9x |
correlation_form <- nlme::corAR1(0.8, form = as.formula(paste0("~ 1 |", group)))
|
| 209 | ||
| 210 | 9x |
formulas <- list( |
| 211 | 9x |
"model" = model_form, "random" = random_form, |
| 212 | 9x |
"fixed" = fixed_form, "groups" = groups_form, |
| 213 | 9x |
"weights" = weights_form, "cor_form" = correlation_form, "pars" = pars |
| 214 |
) |
|
| 215 | 9x |
return(formulas) |
| 216 |
} |
|
| 217 | ||
| 218 |
.nlme_form_gompertz <- function(x, y, group, individual, matched_sigma, pars, int) {
|
|
| 219 |
#* `Define parameters and main growth formula` |
|
| 220 | 4x |
if (int) {
|
| 221 | 1x |
total_pars <- c("I", "A", "B", "C")
|
| 222 | 1x |
model_form <- as.formula(paste0(y, " ~ I + (A*exp(-B*exp(-C*", x, ")))")) |
| 223 |
} else {
|
|
| 224 | 3x |
total_pars <- c("A", "B", "C")
|
| 225 | 3x |
model_form <- as.formula(paste0(y, " ~ A*exp(-B*exp(-C*", x, "))")) |
| 226 |
} |
|
| 227 |
#* `random effects formula` |
|
| 228 | 4x |
random_form <- as.formula(paste0(paste0(total_pars, collapse = " + "), "~ 1")) |
| 229 |
#* `fixed effects formula` |
|
| 230 | 4x |
if (is.null(pars)) {
|
| 231 | 3x |
pars <- total_pars |
| 232 |
} |
|
| 233 | 4x |
if (is.null(group) || group == "dummyGroup") {
|
| 234 | 1x |
pars <- "" |
| 235 |
} |
|
| 236 | 4x |
fixed_form <- lapply(total_pars, function(par) {
|
| 237 | 13x |
if (par %in% pars) {
|
| 238 | 7x |
stats::as.formula(paste0(par, " ~ 0 + ", group)) |
| 239 |
} else {
|
|
| 240 | 6x |
stats::as.formula(paste0(par, " ~ 1")) |
| 241 |
} |
|
| 242 |
}) |
|
| 243 |
#* `groups formula` |
|
| 244 | 4x |
groups_form <- stats::as.formula(paste0("~", group))
|
| 245 |
#* `variance formula` |
|
| 246 | 4x |
weights_form <- .nlme_sigma_form(matched_sigma, x, group) |
| 247 |
#* `correlation formula` |
|
| 248 | 4x |
correlation_form <- nlme::corAR1(0.8, form = as.formula(paste0("~ 1 |", group)))
|
| 249 | ||
| 250 | 4x |
formulas <- list( |
| 251 | 4x |
"model" = model_form, "random" = random_form, |
| 252 | 4x |
"fixed" = fixed_form, "groups" = groups_form, |
| 253 | 4x |
"weights" = weights_form, "cor_form" = correlation_form, "pars" = pars |
| 254 |
) |
|
| 255 | 4x |
return(formulas) |
| 256 |
} |
|
| 257 | ||
| 258 |
.nlme_form_doublelogistic <- function(x, y, group, individual, matched_sigma, pars, int) {
|
|
| 259 |
#* `Define parameters and main growth formula` |
|
| 260 | 4x |
if (int) {
|
| 261 | 1x |
total_pars <- c("I", "A", "B", "C", "A2", "B2", "C2")
|
| 262 | 1x |
model_form <- as.formula(paste0( |
| 263 | 1x |
y, " ~ I + (A/(1+exp((B-", x, |
| 264 | 1x |
")/C)) + ((A2-A) /(1+exp((B2-", x, |
| 265 | 1x |
")/C2))))" |
| 266 |
)) |
|
| 267 |
} else {
|
|
| 268 | 3x |
total_pars <- c("A", "B", "C", "A2", "B2", "C2")
|
| 269 | 3x |
model_form <- as.formula(paste0( |
| 270 | 3x |
y, " ~ A/(1+exp((B-", x, |
| 271 | 3x |
")/C)) + ((A2-A) /(1+exp((B2-", x, |
| 272 | 3x |
")/C2)))" |
| 273 |
)) |
|
| 274 |
} |
|
| 275 |
#* `random effects formula` |
|
| 276 | 4x |
random_form <- as.formula(paste0(paste0(total_pars, collapse = " + "), "~ 1")) |
| 277 |
#* `fixed effects formula` |
|
| 278 | 4x |
if (is.null(pars)) {
|
| 279 | 3x |
pars <- total_pars |
| 280 |
} |
|
| 281 | 4x |
if (is.null(group) || group == "dummyGroup") {
|
| 282 | 1x |
pars <- "" |
| 283 |
} |
|
| 284 | 4x |
fixed_form <- lapply(total_pars, function(par) {
|
| 285 | 25x |
if (par %in% pars) {
|
| 286 | 13x |
stats::as.formula(paste0(par, " ~ 0 + ", group)) |
| 287 |
} else {
|
|
| 288 | 12x |
stats::as.formula(paste0(par, " ~ 1")) |
| 289 |
} |
|
| 290 |
}) |
|
| 291 |
#* `groups formula` |
|
| 292 | 4x |
groups_form <- stats::as.formula(paste0("~", group))
|
| 293 |
#* `variance formula` |
|
| 294 | 4x |
weights_form <- .nlme_sigma_form(matched_sigma, x, group) |
| 295 |
#* `correlation formula` |
|
| 296 | 4x |
correlation_form <- nlme::corAR1(0.8, form = as.formula(paste0("~ 1 |", group)))
|
| 297 | ||
| 298 | 4x |
formulas <- list( |
| 299 | 4x |
"model" = model_form, "random" = random_form, |
| 300 | 4x |
"fixed" = fixed_form, "groups" = groups_form, |
| 301 | 4x |
"weights" = weights_form, "cor_form" = correlation_form, "pars" = pars |
| 302 |
) |
|
| 303 | 4x |
return(formulas) |
| 304 |
} |
|
| 305 | ||
| 306 |
.nlme_form_doublegompertz <- function(x, y, group, individual, matched_sigma, pars, int) {
|
|
| 307 |
#* `Define parameters and main growth formula` |
|
| 308 | 4x |
if (int) {
|
| 309 | 1x |
total_pars <- c("I", "A", "B", "C", "A2", "B2", "C2")
|
| 310 | 1x |
model_form <- as.formula(paste0( |
| 311 | 1x |
y, " ~ I + (A * exp(-B * exp(-C*", x, |
| 312 | 1x |
")) + (A2-A) * exp(-B2 * exp(-C2*(", x, "-B))))"
|
| 313 |
)) |
|
| 314 |
} else {
|
|
| 315 | 3x |
total_pars <- c("A", "B", "C", "A2", "B2", "C2")
|
| 316 | 3x |
model_form <- as.formula(paste0( |
| 317 | 3x |
y, " ~ A * exp(-B * exp(-C*", x, |
| 318 | 3x |
")) + (A2-A) * exp(-B2 * exp(-C2*(", x, "-B)))"
|
| 319 |
)) |
|
| 320 |
} |
|
| 321 |
#* `random effects formula` |
|
| 322 | 4x |
random_form <- as.formula(paste0(paste0(total_pars, collapse = " + "), "~ 1")) |
| 323 |
#* `fixed effects formula` |
|
| 324 | 4x |
if (is.null(pars)) {
|
| 325 | 3x |
pars <- total_pars |
| 326 |
} |
|
| 327 | 4x |
if (is.null(group) || group == "dummyGroup") {
|
| 328 | 1x |
pars <- "" |
| 329 |
} |
|
| 330 | 4x |
fixed_form <- lapply(total_pars, function(par) {
|
| 331 | 25x |
if (par %in% pars) {
|
| 332 | 13x |
stats::as.formula(paste0(par, " ~ 0 + ", group)) |
| 333 |
} else {
|
|
| 334 | 12x |
stats::as.formula(paste0(par, " ~ 1")) |
| 335 |
} |
|
| 336 |
}) |
|
| 337 |
#* `groups formula` |
|
| 338 | 4x |
groups_form <- stats::as.formula(paste0("~", group))
|
| 339 |
#* `variance formula` |
|
| 340 | 4x |
weights_form <- .nlme_sigma_form(matched_sigma, x, group) |
| 341 |
#* `correlation formula` |
|
| 342 | 4x |
correlation_form <- nlme::corAR1(0.8, form = as.formula(paste0("~ 1 |", group)))
|
| 343 | ||
| 344 | 4x |
formulas <- list( |
| 345 | 4x |
"model" = model_form, "random" = random_form, |
| 346 | 4x |
"fixed" = fixed_form, "groups" = groups_form, |
| 347 | 4x |
"weights" = weights_form, "cor_form" = correlation_form, "pars" = pars |
| 348 |
) |
|
| 349 | 4x |
return(formulas) |
| 350 |
} |
|
| 351 | ||
| 352 |
.nlme_form_monomolecular <- function(x, y, group, individual, matched_sigma, pars, int) {
|
|
| 353 |
#* `Define parameters and main growth formula` |
|
| 354 | 4x |
if (int) {
|
| 355 | 1x |
total_pars <- c("I", "A", "B")
|
| 356 | 1x |
model_form <- as.formula(paste0(y, "~I + (A-A*exp(-B*", x, "))")) |
| 357 |
} else {
|
|
| 358 | 3x |
total_pars <- c("A", "B")
|
| 359 | 3x |
model_form <- as.formula(paste0(y, "~A-A*exp(-B*", x, ")")) |
| 360 |
} |
|
| 361 |
#* `random effects formula` |
|
| 362 | 4x |
random_form <- as.formula(paste0(paste0(total_pars, collapse = " + "), "~ 1")) |
| 363 |
#* `fixed effects formula` |
|
| 364 | 4x |
if (is.null(pars)) {
|
| 365 | 3x |
pars <- total_pars |
| 366 |
} |
|
| 367 | 4x |
if (is.null(group) || group == "dummyGroup") {
|
| 368 | 1x |
pars <- "" |
| 369 |
} |
|
| 370 | 4x |
fixed_form <- lapply(total_pars, function(par) {
|
| 371 | 9x |
if (par %in% pars) {
|
| 372 | 5x |
stats::as.formula(paste0(par, " ~ 0 + ", group)) |
| 373 |
} else {
|
|
| 374 | 4x |
stats::as.formula(paste0(par, " ~ 1")) |
| 375 |
} |
|
| 376 |
}) |
|
| 377 |
#* `groups formula` |
|
| 378 | 4x |
groups_form <- stats::as.formula(paste0("~", group))
|
| 379 |
#* `variance formula` |
|
| 380 | 4x |
weights_form <- .nlme_sigma_form(matched_sigma, x, group) |
| 381 |
#* `correlation formula` |
|
| 382 | 4x |
correlation_form <- nlme::corAR1(0.8, form = as.formula(paste0("~ 1 |", group)))
|
| 383 | ||
| 384 | 4x |
formulas <- list( |
| 385 | 4x |
"model" = model_form, "random" = random_form, |
| 386 | 4x |
"fixed" = fixed_form, "groups" = groups_form, |
| 387 | 4x |
"weights" = weights_form, "cor_form" = correlation_form, "pars" = pars |
| 388 |
) |
|
| 389 | 4x |
return(formulas) |
| 390 |
} |
|
| 391 | ||
| 392 |
.nlme_form_exponential <- function(x, y, group, individual, matched_sigma, pars, int) {
|
|
| 393 |
#* `Define parameters and main growth formula` |
|
| 394 | 4x |
if (int) {
|
| 395 | 1x |
total_pars <- c("I", "A", "B")
|
| 396 | 1x |
model_form <- as.formula(paste0(y, " ~ I + (A*exp(B*", x, "))")) |
| 397 |
} else {
|
|
| 398 | 3x |
total_pars <- c("A", "B")
|
| 399 | 3x |
model_form <- as.formula(paste0(y, " ~ A*exp(B*", x, ")")) |
| 400 |
} |
|
| 401 |
#* `random effects formula` |
|
| 402 | 4x |
random_form <- as.formula(paste0(paste0(total_pars, collapse = " + "), "~ 1")) |
| 403 |
#* `fixed effects formula` |
|
| 404 | 4x |
if (is.null(pars)) {
|
| 405 | 3x |
pars <- total_pars |
| 406 |
} |
|
| 407 | 4x |
if (is.null(group) || group == "dummyGroup") {
|
| 408 | 1x |
pars <- "" |
| 409 |
} |
|
| 410 | 4x |
fixed_form <- lapply(total_pars, function(par) {
|
| 411 | 9x |
if (par %in% pars) {
|
| 412 | 5x |
stats::as.formula(paste0(par, " ~ 0 + ", group)) |
| 413 |
} else {
|
|
| 414 | 4x |
stats::as.formula(paste0(par, " ~ 1")) |
| 415 |
} |
|
| 416 |
}) |
|
| 417 |
#* `groups formula` |
|
| 418 | 4x |
groups_form <- stats::as.formula(paste0("~", group))
|
| 419 |
#* `variance formula` |
|
| 420 | 4x |
weights_form <- .nlme_sigma_form(matched_sigma, x, group) |
| 421 |
#* `correlation formula` |
|
| 422 | 4x |
correlation_form <- nlme::corAR1(0.8, form = as.formula(paste0("~ 1 |", group)))
|
| 423 | ||
| 424 | 4x |
formulas <- list( |
| 425 | 4x |
"model" = model_form, "random" = random_form, |
| 426 | 4x |
"fixed" = fixed_form, "groups" = groups_form, |
| 427 | 4x |
"weights" = weights_form, "cor_form" = correlation_form, "pars" = pars |
| 428 |
) |
|
| 429 | 4x |
return(formulas) |
| 430 |
} |
|
| 431 | ||
| 432 |
.nlme_form_linear <- function(x, y, group, individual, matched_sigma, pars, int) {
|
|
| 433 |
#* `Define parameters and main growth formula` |
|
| 434 | 4x |
if (int) {
|
| 435 | 1x |
total_pars <- c("I", "A")
|
| 436 | 1x |
model_form <- as.formula(paste0(y, " ~ I + A*", x)) |
| 437 |
} else {
|
|
| 438 | 3x |
total_pars <- c("A")
|
| 439 | 3x |
model_form <- as.formula(paste0(y, " ~ A*", x)) |
| 440 |
} |
|
| 441 |
#* `random effects formula` |
|
| 442 | 4x |
random_form <- as.formula(paste0(paste0(total_pars, collapse = " + "), "~ 1")) |
| 443 |
#* `fixed effects formula` |
|
| 444 | 4x |
if (is.null(pars)) {
|
| 445 | 3x |
pars <- total_pars |
| 446 |
} |
|
| 447 | 4x |
if (is.null(group) || group == "dummyGroup") {
|
| 448 | 1x |
pars <- "" |
| 449 |
} |
|
| 450 | 4x |
fixed_form <- lapply(total_pars, function(par) {
|
| 451 | 5x |
if (par %in% pars) {
|
| 452 | 3x |
stats::as.formula(paste0(par, " ~ 0 + ", group)) |
| 453 |
} else {
|
|
| 454 | 2x |
stats::as.formula(paste0(par, " ~ 1")) |
| 455 |
} |
|
| 456 |
}) |
|
| 457 |
#* `groups formula` |
|
| 458 | 4x |
groups_form <- stats::as.formula(paste0("~", group))
|
| 459 |
#* `variance formula` |
|
| 460 | 4x |
weights_form <- .nlme_sigma_form(matched_sigma, x, group) |
| 461 |
#* `correlation formula` |
|
| 462 | 4x |
correlation_form <- nlme::corAR1(0.8, form = as.formula(paste0("~ 1 |", group)))
|
| 463 | ||
| 464 | 4x |
formulas <- list( |
| 465 | 4x |
"model" = model_form, "random" = random_form, |
| 466 | 4x |
"fixed" = fixed_form, "groups" = groups_form, |
| 467 | 4x |
"weights" = weights_form, "cor_form" = correlation_form, "pars" = pars |
| 468 |
) |
|
| 469 | 4x |
return(formulas) |
| 470 |
} |
|
| 471 | ||
| 472 |
.nlme_form_logarithmic <- function(x, y, group, individual, matched_sigma, pars, int) {
|
|
| 473 |
#* `Define parameters and main growth formula` |
|
| 474 | 4x |
if (int) {
|
| 475 | 1x |
total_pars <- c("I", "A")
|
| 476 | 1x |
model_form <- as.formula(paste0(y, " ~ I + A*log(", x, ")"))
|
| 477 |
} else {
|
|
| 478 | 3x |
total_pars <- c("A")
|
| 479 | 3x |
model_form <- as.formula(paste0(y, " ~ A*log(", x, ")"))
|
| 480 |
} |
|
| 481 |
#* `random effects formula` |
|
| 482 | 4x |
random_form <- as.formula(paste0(paste0(total_pars, collapse = " + "), "~ 1")) |
| 483 |
#* `fixed effects formula` |
|
| 484 | 4x |
if (is.null(pars)) {
|
| 485 | 3x |
pars <- total_pars |
| 486 |
} |
|
| 487 | 4x |
if (is.null(group) || group == "dummyGroup") {
|
| 488 | 1x |
pars <- "" |
| 489 |
} |
|
| 490 | 4x |
fixed_form <- lapply(total_pars, function(par) {
|
| 491 | 5x |
if (par %in% pars) {
|
| 492 | 3x |
stats::as.formula(paste0(par, " ~ 0 + ", group)) |
| 493 |
} else {
|
|
| 494 | 2x |
stats::as.formula(paste0(par, " ~ 1")) |
| 495 |
} |
|
| 496 |
}) |
|
| 497 |
#* `groups formula` |
|
| 498 | 4x |
groups_form <- stats::as.formula(paste0("~", group))
|
| 499 |
#* `variance formula` |
|
| 500 | 4x |
weights_form <- .nlme_sigma_form(matched_sigma, x, group) |
| 501 |
#* `correlation formula` |
|
| 502 | 4x |
correlation_form <- nlme::corAR1(0.8, form = as.formula(paste0("~ 1 |", group)))
|
| 503 | ||
| 504 | 4x |
formulas <- list( |
| 505 | 4x |
"model" = model_form, "random" = random_form, |
| 506 | 4x |
"fixed" = fixed_form, "groups" = groups_form, |
| 507 | 4x |
"weights" = weights_form, "cor_form" = correlation_form, "pars" = pars |
| 508 |
) |
|
| 509 | 4x |
return(formulas) |
| 510 |
} |
|
| 511 | ||
| 512 | ||
| 513 |
.nlme_form_powerlaw <- function(x, y, group, individual, matched_sigma, pars, int) {
|
|
| 514 |
#* `Define parameters and main growth formula` |
|
| 515 | 4x |
if (int) {
|
| 516 | 1x |
total_pars <- c("I", "A", "B")
|
| 517 | 1x |
model_form <- as.formula(paste0(y, " ~ I + (A*", x, "^B)")) |
| 518 |
} else {
|
|
| 519 | 3x |
total_pars <- c("A", "B")
|
| 520 | 3x |
model_form <- as.formula(paste0(y, " ~ A*", x, "^B")) |
| 521 |
} |
|
| 522 |
#* `random effects formula` |
|
| 523 | 4x |
random_form <- as.formula(paste0(paste0(total_pars, collapse = " + "), "~ 1")) |
| 524 |
#* `fixed effects formula` |
|
| 525 | 4x |
if (is.null(pars)) {
|
| 526 | 3x |
pars <- total_pars |
| 527 |
} |
|
| 528 | 4x |
if (is.null(group) || group == "dummyGroup") {
|
| 529 | 1x |
pars <- "" |
| 530 |
} |
|
| 531 | 4x |
fixed_form <- lapply(total_pars, function(par) {
|
| 532 | 9x |
if (par %in% pars) {
|
| 533 | 5x |
stats::as.formula(paste0(par, " ~ 0 + ", group)) |
| 534 |
} else {
|
|
| 535 | 4x |
stats::as.formula(paste0(par, " ~ 1")) |
| 536 |
} |
|
| 537 |
}) |
|
| 538 |
#* `groups formula` |
|
| 539 | 4x |
groups_form <- stats::as.formula(paste0("~", group))
|
| 540 |
#* `variance formula` |
|
| 541 | 4x |
weights_form <- .nlme_sigma_form(matched_sigma, x, group) |
| 542 |
#* `correlation formula` |
|
| 543 | 4x |
correlation_form <- nlme::corAR1(0.8, form = as.formula(paste0("~ 1 |", group)))
|
| 544 | ||
| 545 | 4x |
formulas <- list( |
| 546 | 4x |
"model" = model_form, "random" = random_form, |
| 547 | 4x |
"fixed" = fixed_form, "groups" = groups_form, |
| 548 | 4x |
"weights" = weights_form, "cor_form" = correlation_form, "pars" = pars |
| 549 |
) |
|
| 550 | 4x |
return(formulas) |
| 551 |
} |
|
| 552 | ||
| 553 | ||
| 554 | ||
| 555 |
.nlme_form_gam <- function(x, y, group, individual, matched_sigma, pars, int) {
|
|
| 556 | 1x |
model_form <- as.formula(paste0(y, " ~", x, "*", group)) |
| 557 |
#* `random effects formula` |
|
| 558 | 1x |
random_form <- stats::setNames(list(nlme::pdIdent(~ splines - 1, data = pars)), group) |
| 559 |
#* `variance formula` |
|
| 560 | 1x |
weights_form <- .nlme_sigma_form(matched_sigma, x, group) |
| 561 |
#* `correlation formula` |
|
| 562 |
#* nlme insists that correlation and RE formulas use the same grouping, |
|
| 563 |
#* so i will not be able to account for individual autocorrelation in the GAM option |
|
| 564 | 1x |
correlation_form <- nlme::corAR1(0.8, form = stats::as.formula(paste0("~ 1 |", group)))
|
| 565 | ||
| 566 | 1x |
formulas <- list( |
| 567 | 1x |
"model" = model_form, "random" = random_form, |
| 568 | 1x |
"weights" = weights_form, "cor_form" = correlation_form, "pars" = pars |
| 569 |
) |
|
| 570 | 1x |
return(formulas) |
| 571 |
} |
|
| 572 | ||
| 573 |
.nlmeDecay <- function(formList) {
|
|
| 574 | ! |
modelForm <- formList$model |
| 575 | ! |
chars <- as.character(modelForm) |
| 576 | ! |
formList$model <- as.formula(paste0(chars[2], chars[1], "-(", chars[3], ")"))
|
| 577 | ! |
formList |
| 578 |
} |
|
| 579 | ||
| 580 | ||
| 581 | ||
| 582 |
.nlme_form_weibull <- function(x, y, group, individual, matched_sigma, pars, int) {
|
|
| 583 |
#* `Define parameters and main growth formula` |
|
| 584 | 4x |
if (int) {
|
| 585 | 1x |
total_pars <- c("I", "A", "B", "C")
|
| 586 | 1x |
model_form <- as.formula(paste0(y, " ~ I + (A * (1-exp(-(", x, "/C)^B)))"))
|
| 587 |
} else {
|
|
| 588 | 3x |
total_pars <- c("A", "B", "C")
|
| 589 | 3x |
model_form <- as.formula(paste0(y, " ~ A * (1-exp(-(", x, "/C)^B))"))
|
| 590 |
} |
|
| 591 |
#* `random effects formula` |
|
| 592 | 4x |
random_form <- as.formula(paste0(paste0(total_pars, collapse = " + "), "~ 1")) |
| 593 |
#* `fixed effects formula` |
|
| 594 | 4x |
if (is.null(pars)) {
|
| 595 | 3x |
pars <- total_pars |
| 596 |
} |
|
| 597 | 4x |
if (is.null(group) || group == "dummyGroup") {
|
| 598 | 1x |
pars <- "" |
| 599 |
} |
|
| 600 | 4x |
fixed_form <- lapply(total_pars, function(par) {
|
| 601 | 13x |
if (par %in% pars) {
|
| 602 | 7x |
stats::as.formula(paste0(par, " ~ 0 + ", group)) |
| 603 |
} else {
|
|
| 604 | 6x |
stats::as.formula(paste0(par, " ~ 1")) |
| 605 |
} |
|
| 606 |
}) |
|
| 607 |
#* `groups formula` |
|
| 608 | 4x |
groups_form <- stats::as.formula(paste0("~", group))
|
| 609 |
#* `variance formula` |
|
| 610 | 4x |
weights_form <- .nlme_sigma_form(matched_sigma, x, group) |
| 611 |
#* `correlation formula` |
|
| 612 | 4x |
correlation_form <- nlme::corAR1(0.8, form = as.formula(paste0("~ 1 |", group)))
|
| 613 | ||
| 614 | 4x |
formulas <- list( |
| 615 | 4x |
"model" = model_form, "random" = random_form, |
| 616 | 4x |
"fixed" = fixed_form, "groups" = groups_form, |
| 617 | 4x |
"weights" = weights_form, "cor_form" = correlation_form, "pars" = pars |
| 618 |
) |
|
| 619 | 4x |
return(formulas) |
| 620 |
} |
|
| 621 | ||
| 622 |
.nlme_form_frechet <- function(x, y, group, individual, matched_sigma, pars, int) {
|
|
| 623 |
#* `Define parameters and main growth formula` |
|
| 624 | 4x |
if (int) {
|
| 625 | 1x |
total_pars <- c("I", "A", "B", "C")
|
| 626 | 1x |
model_form <- as.formula(paste0(y, " ~ I + (A * exp(-((", x, "-0)/C)^(-B)))"))
|
| 627 |
} else {
|
|
| 628 | 3x |
total_pars <- c("A", "B", "C")
|
| 629 | 3x |
model_form <- as.formula(paste0(y, " ~ A * exp(-((", x, "-0)/C)^(-B))"))
|
| 630 |
} |
|
| 631 |
#* `random effects formula` |
|
| 632 | 4x |
random_form <- as.formula(paste0(paste0(total_pars, collapse = " + "), "~ 1")) |
| 633 |
#* `fixed effects formula` |
|
| 634 | 4x |
if (is.null(pars)) {
|
| 635 | 3x |
pars <- total_pars |
| 636 |
} |
|
| 637 | 4x |
if (is.null(group) || group == "dummyGroup") {
|
| 638 | 1x |
pars <- "" |
| 639 |
} |
|
| 640 | 4x |
fixed_form <- lapply(total_pars, function(par) {
|
| 641 | 13x |
if (par %in% pars) {
|
| 642 | 7x |
stats::as.formula(paste0(par, " ~ 0 + ", group)) |
| 643 |
} else {
|
|
| 644 | 6x |
stats::as.formula(paste0(par, " ~ 1")) |
| 645 |
} |
|
| 646 |
}) |
|
| 647 |
#* `groups formula` |
|
| 648 | 4x |
groups_form <- stats::as.formula(paste0("~", group))
|
| 649 |
#* `variance formula` |
|
| 650 | 4x |
weights_form <- .nlme_sigma_form(matched_sigma, x, group) |
| 651 |
#* `correlation formula` |
|
| 652 | 4x |
correlation_form <- nlme::corAR1(0.8, form = as.formula(paste0("~ 1 |", group)))
|
| 653 | ||
| 654 | 4x |
formulas <- list( |
| 655 | 4x |
"model" = model_form, "random" = random_form, |
| 656 | 4x |
"fixed" = fixed_form, "groups" = groups_form, |
| 657 | 4x |
"weights" = weights_form, "cor_form" = correlation_form, "pars" = pars |
| 658 |
) |
|
| 659 | 4x |
return(formulas) |
| 660 |
} |
|
| 661 | ||
| 662 | ||
| 663 |
.nlme_form_gumbel <- function(x, y, group, individual, matched_sigma, pars, int) {
|
|
| 664 |
#* `Define parameters and main growth formula` |
|
| 665 | 4x |
if (int) {
|
| 666 | 1x |
total_pars <- c("I", "A", "B", "C")
|
| 667 | 1x |
model_form <- as.formula(paste0(y, " ~ I + (A * exp(-exp(-(", x, "-B)/C)))"))
|
| 668 |
} else {
|
|
| 669 | 3x |
total_pars <- c("A", "B", "C")
|
| 670 | 3x |
model_form <- as.formula(paste0(y, " ~ A * exp(-exp(-(", x, "-B)/C))"))
|
| 671 |
} |
|
| 672 |
#* `random effects formula` |
|
| 673 | 4x |
random_form <- as.formula(paste0(paste0(total_pars, collapse = " + "), "~ 1")) |
| 674 |
#* `fixed effects formula` |
|
| 675 | 4x |
if (is.null(pars)) {
|
| 676 | 3x |
pars <- total_pars |
| 677 |
} |
|
| 678 | 4x |
if (is.null(group) || group == "dummyGroup") {
|
| 679 | 1x |
pars <- "" |
| 680 |
} |
|
| 681 | 4x |
fixed_form <- lapply(total_pars, function(par) {
|
| 682 | 13x |
if (par %in% pars) {
|
| 683 | 7x |
stats::as.formula(paste0(par, " ~ 0 + ", group)) |
| 684 |
} else {
|
|
| 685 | 6x |
stats::as.formula(paste0(par, " ~ 1")) |
| 686 |
} |
|
| 687 |
}) |
|
| 688 |
#* `groups formula` |
|
| 689 | 4x |
groups_form <- stats::as.formula(paste0("~", group))
|
| 690 |
#* `variance formula` |
|
| 691 | 4x |
weights_form <- .nlme_sigma_form(matched_sigma, x, group) |
| 692 |
#* `correlation formula` |
|
| 693 | 4x |
correlation_form <- nlme::corAR1(0.8, form = as.formula(paste0("~ 1 |", group)))
|
| 694 | ||
| 695 | 4x |
formulas <- list( |
| 696 | 4x |
"model" = model_form, "random" = random_form, |
| 697 | 4x |
"fixed" = fixed_form, "groups" = groups_form, |
| 698 | 4x |
"weights" = weights_form, "cor_form" = correlation_form, "pars" = pars |
| 699 |
) |
|
| 700 | 4x |
return(formulas) |
| 701 |
} |
|
| 702 | ||
| 703 |
.nlme_form_bragg <- function(x, y, group, individual, matched_sigma, pars, int) {
|
|
| 704 |
#* `Define parameters and main growth formula` |
|
| 705 | 4x |
if (int) {
|
| 706 | 1x |
total_pars <- c("I", "A", "B", "C")
|
| 707 | 1x |
model_form <- as.formula(paste0(y, " ~ I + A * exp(-B * (", x, " - C)^2)"))
|
| 708 |
} else {
|
|
| 709 | 3x |
total_pars <- c("A", "B", "C")
|
| 710 | 3x |
model_form <- as.formula(paste0(y, " ~ A * exp(-B * (", x, " - C)^2)"))
|
| 711 |
} |
|
| 712 |
#* `random effects formula` |
|
| 713 | 4x |
random_form <- as.formula(paste0(paste0(total_pars, collapse = " + "), "~ 1")) |
| 714 |
#* `fixed effects formula` |
|
| 715 | 4x |
if (is.null(pars)) {
|
| 716 | 3x |
pars <- total_pars |
| 717 |
} |
|
| 718 | 4x |
if (is.null(group) || group == "dummyGroup") {
|
| 719 | 1x |
pars <- "" |
| 720 |
} |
|
| 721 | 4x |
fixed_form <- lapply(total_pars, function(par) {
|
| 722 | 13x |
if (par %in% pars) {
|
| 723 | 7x |
stats::as.formula(paste0(par, " ~ 0 + ", group)) |
| 724 |
} else {
|
|
| 725 | 6x |
stats::as.formula(paste0(par, " ~ 1")) |
| 726 |
} |
|
| 727 |
}) |
|
| 728 |
#* `groups formula` |
|
| 729 | 4x |
groups_form <- stats::as.formula(paste0("~", group))
|
| 730 |
#* `variance formula` |
|
| 731 | 4x |
weights_form <- .nlme_sigma_form(matched_sigma, x, group) |
| 732 |
#* `correlation formula` |
|
| 733 | 4x |
correlation_form <- nlme::corAR1(0.8, form = as.formula(paste0("~ 1 |", group)))
|
| 734 | ||
| 735 | 4x |
formulas <- list( |
| 736 | 4x |
"model" = model_form, "random" = random_form, |
| 737 | 4x |
"fixed" = fixed_form, "groups" = groups_form, |
| 738 | 4x |
"weights" = weights_form, "cor_form" = correlation_form, "pars" = pars |
| 739 |
) |
|
| 740 | 4x |
return(formulas) |
| 741 |
} |
|
| 742 | ||
| 743 |
.nlme_form_lorentz <- function(x, y, group, individual, matched_sigma, pars, int) {
|
|
| 744 |
#* `Define parameters and main growth formula` |
|
| 745 | 4x |
if (int) {
|
| 746 | 1x |
total_pars <- c("I", "A", "B", "C")
|
| 747 | 1x |
model_form <- as.formula(paste0(y, " ~ I + A / (1 + B * (", x, " - C) ^ 2)"))
|
| 748 |
} else {
|
|
| 749 | 3x |
total_pars <- c("A", "B", "C")
|
| 750 | 3x |
model_form <- as.formula(paste0(y, " ~ A / (1 + B * (", x, " - C) ^ 2)"))
|
| 751 |
} |
|
| 752 |
#* `random effects formula` |
|
| 753 | 4x |
random_form <- as.formula(paste0(paste0(total_pars, collapse = " + "), "~ 1")) |
| 754 |
#* `fixed effects formula` |
|
| 755 | 4x |
if (is.null(pars)) {
|
| 756 | 3x |
pars <- total_pars |
| 757 |
} |
|
| 758 | 4x |
if (is.null(group) || group == "dummyGroup") {
|
| 759 | 1x |
pars <- "" |
| 760 |
} |
|
| 761 | 4x |
fixed_form <- lapply(total_pars, function(par) {
|
| 762 | 13x |
if (par %in% pars) {
|
| 763 | 7x |
stats::as.formula(paste0(par, " ~ 0 + ", group)) |
| 764 |
} else {
|
|
| 765 | 6x |
stats::as.formula(paste0(par, " ~ 1")) |
| 766 |
} |
|
| 767 |
}) |
|
| 768 |
#* `groups formula` |
|
| 769 | 4x |
groups_form <- stats::as.formula(paste0("~", group))
|
| 770 |
#* `variance formula` |
|
| 771 | 4x |
weights_form <- .nlme_sigma_form(matched_sigma, x, group) |
| 772 |
#* `correlation formula` |
|
| 773 | 4x |
correlation_form <- nlme::corAR1(0.8, form = as.formula(paste0("~ 1 |", group)))
|
| 774 | ||
| 775 | 4x |
formulas <- list( |
| 776 | 4x |
"model" = model_form, "random" = random_form, |
| 777 | 4x |
"fixed" = fixed_form, "groups" = groups_form, |
| 778 | 4x |
"weights" = weights_form, "cor_form" = correlation_form, "pars" = pars |
| 779 |
) |
|
| 780 | 4x |
return(formulas) |
| 781 |
} |
|
| 782 | ||
| 783 |
.nlme_form_beta <- function(x, y, group, individual, matched_sigma, pars, int) {
|
|
| 784 |
#* `Define parameters and main growth formula` |
|
| 785 | 4x |
if (int) {
|
| 786 | 1x |
total_pars <- c("I", "A", "B", "C", "D", "E")
|
| 787 | 1x |
model_form <- as.formula(paste0( |
| 788 | 1x |
y, " ~ I + A * (((", x, " - D) / (C - D)) * ((E - ", x,
|
| 789 | 1x |
") / (E - C)) ^ ((E - C) / (C - D))) ^ B" |
| 790 |
)) |
|
| 791 |
} else {
|
|
| 792 | 3x |
total_pars <- c("A", "B", "C", "D", "E")
|
| 793 | 3x |
model_form <- as.formula(paste0( |
| 794 | 3x |
y, " ~ A * (((", x, " - D) / (C - D)) * ((E - ", x,
|
| 795 | 3x |
") / (E - C)) ^ ((E - C) / (C - D))) ^ B" |
| 796 |
)) |
|
| 797 |
} |
|
| 798 |
#* `random effects formula` |
|
| 799 | 4x |
random_form <- as.formula(paste0(paste0(total_pars, collapse = " + "), "~ 1")) |
| 800 |
#* `fixed effects formula` |
|
| 801 | 4x |
if (is.null(pars)) {
|
| 802 | 3x |
pars <- total_pars |
| 803 |
} |
|
| 804 | 4x |
if (is.null(group) || group == "dummyGroup") {
|
| 805 | 1x |
pars <- "" |
| 806 |
} |
|
| 807 | 4x |
fixed_form <- lapply(total_pars, function(par) {
|
| 808 | 21x |
if (par %in% pars) {
|
| 809 | 11x |
stats::as.formula(paste0(par, " ~ 0 + ", group)) |
| 810 |
} else {
|
|
| 811 | 10x |
stats::as.formula(paste0(par, " ~ 1")) |
| 812 |
} |
|
| 813 |
}) |
|
| 814 |
#* `groups formula` |
|
| 815 | 4x |
groups_form <- stats::as.formula(paste0("~", group))
|
| 816 |
#* `variance formula` |
|
| 817 | 4x |
weights_form <- .nlme_sigma_form(matched_sigma, x, group) |
| 818 |
#* `correlation formula` |
|
| 819 | 4x |
correlation_form <- nlme::corAR1(0.8, form = as.formula(paste0("~ 1 |", group)))
|
| 820 | ||
| 821 | 4x |
formulas <- list( |
| 822 | 4x |
"model" = model_form, "random" = random_form, |
| 823 | 4x |
"fixed" = fixed_form, "groups" = groups_form, |
| 824 | 4x |
"weights" = weights_form, "cor_form" = correlation_form, "pars" = pars |
| 825 |
) |
|
| 826 | 4x |
return(formulas) |
| 827 |
} |
| 1 |
#' Check priors used in ease of use brms functions |
|
| 2 |
#' |
|
| 3 |
#' @param priors A named list of means for prior distributions. |
|
| 4 |
#' This takes the same input as the prior argument of \code{\link{growthSS}}.
|
|
| 5 |
#' Alternatively, if given the output of growthSS this will preform a prior predictive check |
|
| 6 |
#' and return a plot from \code{\link{growthPlot}} of that check ignoring all other arguments.
|
|
| 7 |
#' Note that all priors must be |
|
| 8 |
#' proper in that case (non-flat) and the fit is likely to be strange looking due to how thick |
|
| 9 |
#' tailed the default priors from \code{\link{growthSS}} are.
|
|
| 10 |
#' @param type Either "density", the default, or a model as would be specified in \code{growthSS}
|
|
| 11 |
#' or \code{growthSim} such as "logistic", "gompertz", "monomolecular", "exponential",
|
|
| 12 |
#' "linear", "power law", "double logistic", or "double gompertz". |
|
| 13 |
#' If this is a model type then n draws from the prior will be simulated as growth |
|
| 14 |
#' trendlines and densities will be plotted on margins for some distributions. |
|
| 15 |
#' @param n Numeric, if type is a model then how many draws from the prior should be simulated? |
|
| 16 |
#' @param t Numeric, time passed to growthSim. Defaults to 25 (the growthSim default). |
|
| 17 |
#' @keywords Bayesian brms priors |
|
| 18 |
#' @return A named list of plots showing prior distributions that \code{growthSS} would use,
|
|
| 19 |
#' optionally with a plot of simulated growth curves using draws from those priors. |
|
| 20 |
#' @import ggplot2 |
|
| 21 |
#' @import patchwork |
|
| 22 |
#' @importFrom stats rlnorm dlnorm |
|
| 23 |
#' @examples |
|
| 24 |
#' |
|
| 25 |
#' set.seed(123) |
|
| 26 |
#' priors <- list("A" = c(100, 130), "B" = c(10, 8), "C" = c(0.2, 0.1))
|
|
| 27 |
#' plotPrior(priors) |
|
| 28 |
#' |
|
| 29 |
#' plotPrior(priors, "gompertz")[[1]] |
|
| 30 |
#' |
|
| 31 |
#' @export |
|
| 32 | ||
| 33 |
plotPrior <- function(priors, type = "density", n = 200, t = 25) {
|
|
| 34 | 6x |
if ("prior" %in% names(priors)) {
|
| 35 | ! |
p <- .brms_prior_predictive(priors) |
| 36 | ! |
return(p) |
| 37 |
} |
|
| 38 | 6x |
densPlots <- lapply(seq_along(priors), function(i) {
|
| 39 | 18x |
pri <- priors[[i]] |
| 40 | 18x |
nm <- names(priors)[i] |
| 41 | ||
| 42 | 18x |
pri_df <- do.call(rbind, lapply(seq_along(pri), function(o) {
|
| 43 | 30x |
prio <- pri[o] |
| 44 | 30x |
max <- ceiling(max(rlnorm(1000, log(max(pri)), 0.25)) * 1.1) |
| 45 | 30x |
support <- seq(0, max, length.out = 10000) |
| 46 | 30x |
dens <- dlnorm(support, meanlog = log(prio), sdlog = 0.25) |
| 47 | 30x |
pdf <- dens / sum(dens) |
| 48 | 30x |
data.frame( |
| 49 | 30x |
support = support, |
| 50 | 30x |
dens = pdf, |
| 51 | 30x |
param = nm, |
| 52 | 30x |
item = as.character(o) |
| 53 |
) |
|
| 54 |
})) |
|
| 55 | ||
| 56 | 18x |
ggplot2::ggplot(pri_df, ggplot2::aes( |
| 57 | 18x |
x = .data$support, y = .data$dens, |
| 58 | 18x |
fill = .data$item, group = .data$item |
| 59 |
)) + |
|
| 60 | 18x |
ggplot2::geom_polygon(alpha = 0.5) + |
| 61 | 18x |
ggplot2::theme_minimal() + |
| 62 | 18x |
ggplot2::labs(y = "Density", title = nm, fill = "Prior") |
| 63 |
}) |
|
| 64 | 6x |
names(densPlots) <- names(priors) |
| 65 | ||
| 66 | 6x |
if (type == "density") {
|
| 67 | 3x |
out <- densPlots |
| 68 |
} else {
|
|
| 69 | 3x |
simdf <- do.call(rbind, lapply(1:n, function(i) {
|
| 70 | 400x |
iter_params <- .prior_sampler(priors) |
| 71 | 400x |
x <- growthSim(model = type, n = 1, t = t, params = iter_params) |
| 72 | 400x |
x$id <- paste0("id_", i)
|
| 73 | 400x |
x |
| 74 |
})) |
|
| 75 | ||
| 76 | 3x |
model_plot <- ggplot2::ggplot( |
| 77 | 3x |
simdf, |
| 78 | 3x |
ggplot2::aes( |
| 79 | 3x |
x = .data$time, y = .data$y, group = interaction(.data$id, .data$group), |
| 80 | 3x |
color = .data$group |
| 81 |
) |
|
| 82 |
) + |
|
| 83 | 3x |
ggplot2::geom_line(linewidth = 0.1) + |
| 84 | 3x |
ggplot2::theme_minimal() + |
| 85 | 3x |
ggplot2::guides(color = ggplot2::guide_legend(override.aes = list(linewidth = 5))) + |
| 86 | 3x |
ggplot2::labs( |
| 87 | 3x |
y = "Y", title = paste0(n, " curves simulated from prior draws"), |
| 88 | 3x |
color = "Prior" |
| 89 |
) + |
|
| 90 | 3x |
ggplot2::theme( |
| 91 | 3x |
legend.position = "inside", |
| 92 | 3x |
legend.position.inside = c(0.9, 0.9) |
| 93 |
) |
|
| 94 | ||
| 95 | 3x |
if (type %in% c("logistic", "gompertz", "weibull", "frechet", "gumbel")) {
|
| 96 | 3x |
x <- "B" |
| 97 | 3x |
y <- "A" |
| 98 | 3x |
z <- "C" |
| 99 | ! |
} else if (type %in% c("monomolecular")) {
|
| 100 | ! |
y <- "A" |
| 101 | ! |
x <- z <- NULL |
| 102 |
} else {
|
|
| 103 | ! |
x <- y <- z <- NULL |
| 104 |
} |
|
| 105 | 3x |
out <- .plotPriorMarginPlots(model_plot, densPlots, x, y, z) |
| 106 |
} |
|
| 107 | ||
| 108 | 6x |
return(out) |
| 109 |
} |
|
| 110 | ||
| 111 |
#' @description |
|
| 112 |
#' Internal function for adding marginal plots to plotPrior |
|
| 113 |
#' @keywords internal |
|
| 114 |
#' @noRd |
|
| 115 | ||
| 116 |
.plotPriorMarginPlots <- function(model_plot, densPlots, x, y, z) {
|
|
| 117 | 3x |
xLims <- ggplot2::layer_scales(model_plot)$x$range$range |
| 118 | 3x |
yLims <- ggplot2::layer_scales(model_plot)$y$range$range |
| 119 | 3x |
model_plot_solo <- model_plot |
| 120 | ||
| 121 | 3x |
sum_non_null <- 0 |
| 122 | 3x |
x_margin_plot <- NULL |
| 123 | 3x |
y_margin_plot <- NULL |
| 124 | 3x |
z_margin_plot <- NULL |
| 125 | ||
| 126 | 3x |
if (!is.null(y)) {
|
| 127 | 3x |
y_margin_plot <- densPlots[[y]] + |
| 128 | 3x |
ggplot2::scale_y_reverse(position = "right") + |
| 129 | 3x |
ggplot2::scale_x_continuous(position = "top", limits = yLims) + |
| 130 | 3x |
ggplot2::labs(x = "Asymptote Prior") + |
| 131 | 3x |
ggplot2::coord_flip() + |
| 132 | 3x |
ggplot2::theme( |
| 133 | 3x |
plot.title = ggplot2::element_blank(), panel.grid = ggplot2::element_blank(), |
| 134 | 3x |
axis.text = ggplot2::element_blank(), axis.title.x = ggplot2::element_blank(), |
| 135 | 3x |
legend.position = "none" |
| 136 |
) |
|
| 137 | 3x |
sum_non_null <- sum_non_null + 1 |
| 138 |
} |
|
| 139 | ||
| 140 | 3x |
if (!is.null(x)) {
|
| 141 | 3x |
x_margin_plot <- densPlots[[x]] + |
| 142 | 3x |
ggplot2::labs(x = "Inflection Point Prior") + |
| 143 | 3x |
ggplot2::theme( |
| 144 | 3x |
plot.title = ggplot2::element_blank(), panel.grid = ggplot2::element_blank(), |
| 145 | 3x |
axis.text.y = ggplot2::element_blank(), axis.title.y = ggplot2::element_blank(), |
| 146 | 3x |
legend.position = "none" |
| 147 |
) + |
|
| 148 | 3x |
ggplot2::coord_cartesian(xlim = xLims) |
| 149 | 3x |
sum_non_null <- sum_non_null + 1 |
| 150 |
} |
|
| 151 | ||
| 152 | 3x |
if (!is.null(z)) {
|
| 153 | 3x |
z_margin_plot <- densPlots[[z]] + |
| 154 | 3x |
ggplot2::labs(x = "Growth Rate Prior") + |
| 155 | 3x |
ggplot2::theme( |
| 156 | 3x |
plot.title = ggplot2::element_blank(), panel.grid = ggplot2::element_blank(), |
| 157 | 3x |
axis.text.y = ggplot2::element_blank(), axis.title.y = ggplot2::element_blank(), |
| 158 | 3x |
legend.position = "none" |
| 159 |
) + |
|
| 160 | 3x |
ggplot2::scale_x_continuous(n.breaks = 3) |
| 161 | 3x |
sum_non_null <- sum_non_null + 1 |
| 162 |
} |
|
| 163 | ||
| 164 | 3x |
if (sum_non_null == 3) {
|
| 165 | 3x |
design <- c( |
| 166 | 3x |
patchwork::area(1, 1, 6, 6), # model plot |
| 167 | 3x |
patchwork::area(7, 1, 7, 6), # x margin |
| 168 | 3x |
patchwork::area(1, 7, 6, 7), # y margin |
| 169 | 3x |
patchwork::area(7, 7, 7, 7) |
| 170 | 3x |
) # "z" margin |
| 171 | 3x |
layout <- patchwork::plot_layout(design = design) |
| 172 | 3x |
model_plot <- model_plot + x_margin_plot + y_margin_plot + z_margin_plot + layout |
| 173 | ! |
} else if (!is.null(y_margin_plot) && is.null(x_margin_plot)) {
|
| 174 | ! |
design <- c( |
| 175 | ! |
patchwork::area(1, 1, 6, 6), # model plot |
| 176 | ! |
patchwork::area(1, 7, 6, 7) |
| 177 | ! |
) # y margin |
| 178 | ! |
model_plot <- model_plot + y_margin_plot + patchwork::plot_layout(design = design) |
| 179 |
} |
|
| 180 | ||
| 181 | 3x |
if (is(model_plot, "patchwork")) {
|
| 182 | 3x |
densPlots[[length(densPlots) + 1]] <- model_plot_solo |
| 183 |
} |
|
| 184 | 3x |
return(list("simulated" = model_plot, "distributions" = densPlots))
|
| 185 |
} |
|
| 186 | ||
| 187 | ||
| 188 | ||
| 189 | ||
| 190 |
#' @description |
|
| 191 |
#' Internal function for drawing from priors |
|
| 192 |
#' @param priors priors as a list |
|
| 193 |
#' @keywords internal |
|
| 194 |
#' @noRd |
|
| 195 | ||
| 196 |
.prior_sampler <- function(priors) {
|
|
| 197 | 400x |
lapply(priors, function(pri) { # draw sample from prior
|
| 198 | 1200x |
unlist(lapply(pri, function(mu) {
|
| 199 | 2400x |
rlnorm(1, log(mu), 0.25) |
| 200 |
})) |
|
| 201 |
}) |
|
| 202 |
} |
|
| 203 | ||
| 204 |
#' @description |
|
| 205 |
#' Internal function for sampling a growthSS model's priors only |
|
| 206 |
#' @param priors a list returned by growthSS |
|
| 207 |
#' @keywords internal |
|
| 208 |
#' @noRd |
|
| 209 | ||
| 210 |
.brms_prior_predictive <- function(priors = NULL) {
|
|
| 211 | ! |
dp <- brms::get_prior(priors$formula, priors$df, priors$family) |
| 212 | ! |
ssp <- priors$prior |
| 213 | ! |
dpi <- as.character(interaction(dp$coef, dp$dpar, dp$nlpar)) |
| 214 | ! |
sspi <- as.character(interaction(ssp$coef, ssp$dpar, ssp$nlpar)) |
| 215 | ! |
priors$prior <- rbind(ssp, dp[-which(dpi %in% sspi), ]) |
| 216 | ! |
tryCatch( |
| 217 |
{
|
|
| 218 | ! |
m <- suppressMessages( |
| 219 | ! |
fitGrowth(priors, |
| 220 | ! |
iter = 1000, |
| 221 | ! |
chains = 1, |
| 222 | ! |
cores = 1, |
| 223 | ! |
sample_prior = "only", |
| 224 | ! |
silent = 2 |
| 225 |
) |
|
| 226 |
) |
|
| 227 |
}, |
|
| 228 | ! |
error = function(err) {
|
| 229 | ! |
message(paste0( |
| 230 | ! |
"Error trying to sample from priors distributions.", |
| 231 | ! |
"All priors must be proper (non-flat). Check prior specification in growthSS\n" |
| 232 |
)) |
|
| 233 | ! |
message("The original Error message is:")
|
| 234 | ! |
stop(conditionMessage(err)) |
| 235 |
} |
|
| 236 |
) |
|
| 237 | ! |
p <- growthPlot(m, form = priors$pcvrForm, df = priors$df) |
| 238 | ! |
return(p) |
| 239 |
} |
| 1 |
#' @description |
|
| 2 |
#' Internal function for calculating \alpha and \beta of the gamma distributed variance of lognormal |
|
| 3 |
#' data given an estimate of the lognormal \mu obtained via the method of moments using multi value |
|
| 4 |
#' traits. |
|
| 5 |
#' @param s1 A data.frame or matrix of multi value traits. The column names should include a number |
|
| 6 |
#' representing the "bin". |
|
| 7 |
#' |
|
| 8 |
#' @examples |
|
| 9 |
#' mv_ln <- mvSim( |
|
| 10 |
#' dists = list( |
|
| 11 |
#' rlnorm = list(meanlog = log(130), sdlog = log(1.2)) |
|
| 12 |
#' ), |
|
| 13 |
#' n_samples = 30 |
|
| 14 |
#' ) |
|
| 15 |
#' .conj_lognormal2_mv( |
|
| 16 |
#' s1 = mv_ln[, -1], |
|
| 17 |
#' priors = NULL |
|
| 18 |
#' plot = FALSE, |
|
| 19 |
#' cred.int.level = 0.89, |
|
| 20 |
#' ) |
|
| 21 |
#' @keywords internal |
|
| 22 |
#' @noRd |
|
| 23 | ||
| 24 |
.conj_lognormal2_mv <- function(s1 = NULL, priors = NULL, |
|
| 25 |
plot = FALSE, support = NULL, cred.int.level = NULL, |
|
| 26 |
calculatingSupport = FALSE) {
|
|
| 27 | 4x |
out <- list() |
| 28 |
#* `make default prior if none provided` |
|
| 29 | 4x |
if (is.null(priors)) {
|
| 30 | 4x |
priors <- list(a = 1, b = 1) # prior on shape, scale of precision |
| 31 |
} |
|
| 32 |
#* `Reorder columns if they are not in the numeric order` |
|
| 33 | 4x |
histColsBin <- as.numeric(sub("[a-zA-Z_.]+", "", colnames(s1)))
|
| 34 | 4x |
bins_order <- sort(histColsBin, index.return = TRUE)$ix |
| 35 | 4x |
s1 <- s1[, bins_order] |
| 36 |
#* `Loop over reps, get moments for each histogram` |
|
| 37 | 4x |
rep_distributions <- lapply(seq_len(nrow(s1)), function(i) {
|
| 38 | 120x |
X1 <- rep(histColsBin[bins_order], as.numeric(s1[i, ])) |
| 39 |
#* `Get mean of x1` |
|
| 40 | 120x |
x_bar <- mean(X1) |
| 41 | 120x |
mu_s1 <- log(x_bar / (sqrt(var(X1) / x_bar^2) + 1)) |
| 42 |
#* `Update Gamma Distribution of precision` |
|
| 43 |
#* sufficient stats: n, ss |
|
| 44 | 120x |
ss <- nrow(s1) * mean((log(X1) - mu_s1)^2) # mean * nrow instead of sum for MV traits |
| 45 | 120x |
n1 <- nrow(s1) |
| 46 | 120x |
a_prime <- priors$a[1] + (n1 / 2) |
| 47 | 120x |
b_prime <- priors$b[1] + (ss / 2) |
| 48 | 120x |
return(list("a_prime" = a_prime, "b_prime" = b_prime, "ln_mu" = mu_s1))
|
| 49 |
}) |
|
| 50 |
#* `Unlist parameters` |
|
| 51 | 4x |
a_prime <- mean(unlist(lapply(rep_distributions, function(i) {
|
| 52 | 120x |
i$a_prime |
| 53 |
}))) |
|
| 54 | 4x |
b_prime <- mean(unlist(lapply(rep_distributions, function(i) {
|
| 55 | 120x |
i$b_prime |
| 56 |
}))) |
|
| 57 | 4x |
ln_mu_prime <- mean(unlist(lapply(rep_distributions, function(i) {
|
| 58 | 120x |
i$ln_mu |
| 59 |
}))) |
|
| 60 |
#* `Define support if it is missing` |
|
| 61 | 4x |
if (is.null(support) && calculatingSupport) {
|
| 62 | 2x |
quantiles <- qgamma(c(0.0001, 0.9999), shape = a_prime, scale = b_prime) |
| 63 | 2x |
return(quantiles) |
| 64 |
} |
|
| 65 |
#* `posterior` |
|
| 66 | 2x |
dens1 <- dgamma(support, shape = a_prime, scale = b_prime) |
| 67 | 2x |
pdf1 <- dens1 / sum(dens1) |
| 68 | 2x |
hde1 <- .gammaHDE(shape = a_prime, scale = b_prime) |
| 69 | 2x |
hdi1 <- qgamma(c((1 - cred.int.level) / 2, (1 - ((1 - cred.int.level) / 2))), |
| 70 | 2x |
shape = a_prime, scale = b_prime |
| 71 |
) |
|
| 72 |
#* `Store summary` |
|
| 73 | 2x |
out$summary <- data.frame(HDE_1 = hde1, HDI_1_low = hdi1[1], HDI_1_high = hdi1[2]) |
| 74 | 2x |
out$posterior$a <- a_prime |
| 75 | 2x |
out$posterior$b <- b_prime |
| 76 | 2x |
out$posterior$lognormal_mu <- ln_mu_prime # returning this as a number, not a distribution |
| 77 |
#* `Make Posterior Draws` |
|
| 78 | 2x |
out$posteriorDraws <- rgamma(10000, shape = a_prime, scale = b_prime) |
| 79 | 2x |
out$pdf <- pdf1 |
| 80 |
#* `save s1 data for plotting` |
|
| 81 | 2x |
if (plot) {
|
| 82 | 2x |
out$plot_df <- data.frame( |
| 83 | 2x |
"range" = support, |
| 84 | 2x |
"prob" = pdf1, |
| 85 | 2x |
"sample" = rep("Sample 1", length(support))
|
| 86 |
) |
|
| 87 |
} |
|
| 88 | 2x |
return(out) |
| 89 |
} |
|
| 90 | ||
| 91 | ||
| 92 | ||
| 93 |
#' @description |
|
| 94 |
#' Internal function for calculating \alpha and \beta of the gamma distributed precision of lognormal |
|
| 95 |
#' data given an estimate of the lognormal \mu obtained via the method of moments using single value |
|
| 96 |
#' traits. |
|
| 97 |
#' |
|
| 98 |
#' @param s1 A vector of numerics drawn from a gaussian distribution. |
|
| 99 |
#' @examples |
|
| 100 |
#' .conj_lognormal2_sv( |
|
| 101 |
#' s1 = rlnorm(100, log(130), log(1.3)), |
|
| 102 |
#' priors = NULL, |
|
| 103 |
#' plot = FALSE, |
|
| 104 |
#' cred.int.level = 0.89 |
|
| 105 |
#' ) |
|
| 106 |
#' @keywords internal |
|
| 107 |
#' @noRd |
|
| 108 | ||
| 109 |
.conj_lognormal2_sv <- function(s1 = NULL, priors = NULL, |
|
| 110 |
plot = FALSE, support = NULL, cred.int.level = NULL, |
|
| 111 |
calculatingSupport = FALSE) {
|
|
| 112 | 4x |
out <- list() |
| 113 |
#* `make default prior if none provided` |
|
| 114 | 4x |
if (is.null(priors)) {
|
| 115 | 4x |
priors <- list(a = 1, b = 1) # prior on shape, scale of precision |
| 116 |
} |
|
| 117 |
#* `Get mean of s1` |
|
| 118 | 4x |
x_bar <- mean(s1) |
| 119 | 4x |
mu_s1 <- log(x_bar / (sqrt(var(s1) / x_bar^2) + 1)) |
| 120 |
#* `Update Gamma Distribution of precision` |
|
| 121 |
#* sufficient stats: n, ss |
|
| 122 | 4x |
ss <- sum((log(s1) - mu_s1)^2) |
| 123 | 4x |
n1 <- length(s1) |
| 124 | 4x |
a_prime <- priors$a[1] + (n1 / 2) |
| 125 | 4x |
b_prime <- priors$b[1] + (ss / 2) |
| 126 |
#* `Define support if it is missing` |
|
| 127 | 4x |
if (is.null(support) && calculatingSupport) {
|
| 128 | 2x |
quantiles <- qgamma(c(0.0001, 0.9999), shape = a_prime, scale = b_prime) |
| 129 | 2x |
return(quantiles) |
| 130 |
} |
|
| 131 |
#* `posterior` |
|
| 132 | 2x |
dens1 <- dgamma(support, shape = a_prime, scale = b_prime) |
| 133 | 2x |
pdf1 <- dens1 / sum(dens1) |
| 134 | 2x |
hde1 <- .gammaHDE(shape = a_prime, scale = b_prime) |
| 135 | 2x |
hdi1 <- qgamma(c((1 - cred.int.level) / 2, (1 - ((1 - cred.int.level) / 2))), |
| 136 | 2x |
shape = a_prime, scale = b_prime |
| 137 |
) |
|
| 138 |
#* `Store summary` |
|
| 139 | 2x |
out$summary <- data.frame(HDE_1 = hde1, HDI_1_low = hdi1[1], HDI_1_high = hdi1[2]) |
| 140 | 2x |
out$posterior$a <- a_prime |
| 141 | 2x |
out$posterior$b <- b_prime |
| 142 | 2x |
out$posterior$lognormal_mu <- mu_s1 # returning this as a number, not a distribution |
| 143 |
#* `Make Posterior Draws` |
|
| 144 | 2x |
out$posteriorDraws <- rgamma(10000, shape = a_prime, scale = b_prime) |
| 145 | 2x |
out$pdf <- pdf1 |
| 146 |
#* `save s1 data for plotting` |
|
| 147 | 2x |
if (plot) {
|
| 148 | 2x |
out$plot_df <- data.frame( |
| 149 | 2x |
"range" = support, |
| 150 | 2x |
"prob" = pdf1, |
| 151 | 2x |
"sample" = rep("Sample 1", length(support))
|
| 152 |
) |
|
| 153 |
} |
|
| 154 | 2x |
return(out) |
| 155 |
} |
| 1 |
#' Hypothesis testing for \link{fitGrowth} models.
|
|
| 2 |
#' |
|
| 3 |
#' @param ss A list output from \link{growthSS}. This is not required for nls, nlme, and brms models
|
|
| 4 |
#' if \code{test} is given in \code{brms::hypothesis} style as a written statement.
|
|
| 5 |
#' @param fit A model (or list of nlrq models) output from \link{fitGrowth}.
|
|
| 6 |
#' @param test A description of the hypothesis to test. This can take two main forms, |
|
| 7 |
#' either the parameter names to vary before comparing a nested model ("A", "B", "C") using an anova
|
|
| 8 |
#' or a hypothesis test/list of hypothesis tests written as character strings. |
|
| 9 |
#' The latter method is not implemented for \code{nlrq} models. If this is a vector of parameters
|
|
| 10 |
#' to test in the model then they should be parameters which vary by group in your original model |
|
| 11 |
#' and that you want to test against a null model where they do not vary by group. |
|
| 12 |
#' Alternatively for nlrq models this can be a comparison of model terms |
|
| 13 |
#' written as \code{"group_X|tau|par - group_Y|tau|par"}, which uses a fat tailed T distribution to make
|
|
| 14 |
#' comparisons on the means of each quantile estimate. For GAMs these tests compare the model with |
|
| 15 |
#' splines either by group or interacting with group to a model that ignores the grouping in the data. |
|
| 16 |
#' If this is a list of hypothesis tests then they should describe tests similar to |
|
| 17 |
#' "A.group1 - A.group2*1.1" and can be thought of as contrasts. For brms models the "test" argument |
|
| 18 |
#' is passed to brms::hypothesis, which has extensive documentation and is very flexible. |
|
| 19 |
#' Note that for survreg the \code{survival::survdiff} function is used so fewer hypothesis testing
|
|
| 20 |
#' options are available and flexsurv models are tested using contrasts via \code{flexsurv::standsurv}.
|
|
| 21 |
#' @keywords hypothesis brms nlme nls nlrq mgcv |
|
| 22 |
#' @importFrom stats getCall logLik pchisq anova as.formula setNames vcov coef pt |
|
| 23 |
#' @importFrom nlme pdIdent corAR1 fixef |
|
| 24 |
#' @importFrom extraDistr qlst dlst |
|
| 25 |
#' @importFrom methods is |
|
| 26 |
#' @importFrom car deltaMethod |
|
| 27 |
#' @importFrom survival survdiff |
|
| 28 |
#' |
|
| 29 |
#' @details |
|
| 30 |
#' For nls and nlme models an anova is run and returned as part of a list along with the null model. |
|
| 31 |
#' For nlrq models several assumptions are made and a likelihood ratio test for each tau |
|
| 32 |
#' is run and returned as a list. |
|
| 33 |
#' |
|
| 34 |
#' |
|
| 35 |
#' @return A list containing an anova object comparing non-linear growth models and the null model. |
|
| 36 |
#' |
|
| 37 |
#' @examples |
|
| 38 |
#' |
|
| 39 |
#' set.seed(123) |
|
| 40 |
#' simdf <- growthSim("logistic",
|
|
| 41 |
#' n = 20, t = 25, |
|
| 42 |
#' params = list("A" = c(200, 160), "B" = c(13, 11), "C" = c(3, 3.5))
|
|
| 43 |
#' ) |
|
| 44 |
#' ss <- suppressMessages(growthSS( |
|
| 45 |
#' model = "logistic", form = y ~ time | id / group, |
|
| 46 |
#' df = simdf, type = "nlrq" |
|
| 47 |
#' )) |
|
| 48 |
#' fit <- fitGrowth(ss) |
|
| 49 |
#' testGrowth(ss, fit, "A") |
|
| 50 |
#' testGrowth(ss, fit, "a|0.5|A > b|0.5|A") |
|
| 51 |
#' |
|
| 52 |
#' ss2 <- suppressMessages(growthSS( |
|
| 53 |
#' model = "logistic", form = y ~ time | id / group, |
|
| 54 |
#' df = simdf, type = "nls" |
|
| 55 |
#' )) |
|
| 56 |
#' fit2 <- fitGrowth(ss2) |
|
| 57 |
#' testGrowth(ss2, fit2, "A")$anova |
|
| 58 |
#' coef(fit2) # check options for contrast testing |
|
| 59 |
#' testGrowth(ss2, fit2, "A1 - A2*1.1") |
|
| 60 |
#' |
|
| 61 |
#' @export |
|
| 62 | ||
| 63 |
testGrowth <- function(ss = NULL, fit, test = "A") {
|
|
| 64 | 19x |
method <- .specifyTestType(ss, test) |
| 65 | 19x |
if (method == "contrast") {
|
| 66 | 8x |
res <- .nlhypothesis(fit, test, ss) |
| 67 | 11x |
} else if (method == "anova") {
|
| 68 | 11x |
if (ss$model == "gam") {
|
| 69 |
#* do gam things |
|
| 70 | 1x |
if (ss$type == "nls") {
|
| 71 | ! |
res <- .lmGamAnova(ss, fit) |
| 72 | 1x |
} else if (ss$type == "nlrq") {
|
| 73 | ! |
res <- .rqGamAnova(ss, fit) |
| 74 | 1x |
} else if (ss$type == "nlme") {
|
| 75 | ! |
res <- .lmeGamAnova(ss, fit) |
| 76 | 1x |
} else if (ss$type == "mgcv") {
|
| 77 | 1x |
res <- .mgcvGamAnova(ss, fit) |
| 78 |
} |
|
| 79 | 10x |
} else if (ss$type %in% c("nls", "nlme")) {
|
| 80 | 7x |
res <- .nlsAnova(ss, fit, test_pars = test) |
| 81 | 3x |
} else if (ss$type == "nlrq") {
|
| 82 | 3x |
if (any(test %in% c("A", "B", "C"))) {
|
| 83 | 3x |
res <- .nlrqTest(ss, fit, test_pars = test) |
| 84 |
} |
|
| 85 | ! |
} else if (grepl("surv", ss$type)) {
|
| 86 | ! |
res <- .survTest(ss) |
| 87 |
} |
|
| 88 |
} |
|
| 89 | ||
| 90 | 19x |
return(res) |
| 91 |
} |
|
| 92 | ||
| 93 |
#' Helper function to specify type of testing to do |
|
| 94 |
#' @examples |
|
| 95 |
#' .specifyTestType(NULL, "anything") # survival test |
|
| 96 |
#' .specifyTestType(list(), "A") # parameter anova test |
|
| 97 |
#' .specifyTestType(list(), "A1 > A2") # contrast test |
|
| 98 |
#' @keywords internal |
|
| 99 |
#' @noRd |
|
| 100 | ||
| 101 |
.specifyTestType <- function(ss, test) {
|
|
| 102 | 19x |
if (!is.null(ss) && grepl("surv", ss$type)) {
|
| 103 | ! |
test <- "S" |
| 104 |
} |
|
| 105 | 19x |
if (all(unlist(lapply(test, nchar)) <= 2)) {
|
| 106 | 11x |
method <- "anova" |
| 107 |
} else {
|
|
| 108 | 8x |
method <- "contrast" |
| 109 |
} |
|
| 110 | 19x |
return(method) |
| 111 |
} |
|
| 112 | ||
| 113 |
#' (non)linear hypothesis function for nls and nlme models |
|
| 114 |
#' @examples |
|
| 115 |
#' |
|
| 116 |
#' logistic_df <- growthSim("logistic",
|
|
| 117 |
#' n = 20, t = 25, |
|
| 118 |
#' params = list("A" = c(200, 160), "B" = c(13, 11), "C" = c(3, 3.5))
|
|
| 119 |
#' ) |
|
| 120 |
#' ss <- growthSS( |
|
| 121 |
#' model = "logistic", form = y ~ time | id / group, |
|
| 122 |
#' df = logistic_df, type = "nls" |
|
| 123 |
#' ) |
|
| 124 |
#' fit <- fitGrowth(ss) |
|
| 125 |
#' .nlhypothesis(fit, test = list("A1 - A2", "B1-B2"))
|
|
| 126 |
#' .nlhypothesis(fit, test = "A1 + 10 - A2 * 1.1") |
|
| 127 |
#' .nlhypothesis(fit, test = "A1/B1 - A2/B2") |
|
| 128 |
#' |
|
| 129 |
#' @keywords internal |
|
| 130 |
#' @noRd |
|
| 131 | ||
| 132 |
.nlhypothesis <- function(fit, test, ss) {
|
|
| 133 | 8x |
if (methods::is(fit, "nlme")) {
|
| 134 | 2x |
coefs <- nlme::fixef(fit) |
| 135 | 6x |
} else if (methods::is(fit, "nls")) {
|
| 136 | 4x |
coefs <- stats::coef(fit) |
| 137 | 2x |
} else if (methods::is(fit, "brmsfit")) {
|
| 138 | ! |
out <- brms::hypothesis(fit, test) |
| 139 | ! |
return(out) |
| 140 | 2x |
} else if (methods::is(fit, "nlrq") || is.list(fit)) {
|
| 141 | 2x |
out <- .nlrqTest2(ss, fit, test) |
| 142 | 2x |
return(out) |
| 143 |
} |
|
| 144 | 6x |
dfresid <- summary(fit)$df[2] |
| 145 | 6x |
vcMat <- stats::vcov(fit) |
| 146 | 6x |
hypotheses <- data.frame(form = unlist(test)) |
| 147 | 6x |
colnames(hypotheses) <- c("Form")
|
| 148 | 6x |
val <- do.call(rbind, lapply(seq_len(nrow(hypotheses)), function(i) {
|
| 149 | 15x |
car::deltaMethod( |
| 150 | 15x |
object = coefs, g = as.character(hypotheses$Form[i]), |
| 151 | 15x |
vcov. = vcMat, level = 0.95 |
| 152 |
) |
|
| 153 |
})) |
|
| 154 | 6x |
row.names(val) <- seq_len(nrow(hypotheses)) |
| 155 | 6x |
val <- cbind(hypotheses, val) |
| 156 | 6x |
lenVal <- ncol(val) |
| 157 | 6x |
out <- val[, 1:(lenVal - 2)] |
| 158 | 6x |
out$"t-value" <- abs(out$Estimate / out$SE) |
| 159 | 6x |
residualDF <- ifelse(is.null(dfresid), Inf, dfresid) |
| 160 | 6x |
out$"p-value" <- 2 * pt(out$"t-value", residualDF, lower.tail = FALSE) |
| 161 | 2x |
if (residualDF == Inf) colnames(out)[length(colnames(out)) - 1] <- "Z-value" |
| 162 | 6x |
return(out) |
| 163 |
} |
|
| 164 | ||
| 165 | ||
| 166 | ||
| 167 |
#' mgcv gam testing function |
|
| 168 |
#' @examples |
|
| 169 |
#' |
|
| 170 |
#' logistic_df <- growthSim("logistic",
|
|
| 171 |
#' n = 20, t = 25, |
|
| 172 |
#' params = list("A" = c(200, 160), "B" = c(13, 11), "C" = c(3, 3.5))
|
|
| 173 |
#' ) |
|
| 174 |
#' ss <- growthSS( |
|
| 175 |
#' model = "gam", form = y ~ time | id / group, |
|
| 176 |
#' df = logistic_df, type = "mgcv" |
|
| 177 |
#' ) |
|
| 178 |
#' fit <- fitGrowth(ss) |
|
| 179 |
#' .mgcvGamAnova(ss, fit)$anova |
|
| 180 |
#' |
|
| 181 |
#' @keywords internal |
|
| 182 |
#' @noRd |
|
| 183 | ||
| 184 |
.mgcvGamAnova <- function(ss, fit) {
|
|
| 185 |
#* `Get x variable` |
|
| 186 | 1x |
RHS <- as.character(ss$formula)[3] |
| 187 | 1x |
x <- sub(",", "", sub("s\\(", "", regmatches(RHS, regexpr("s\\(.*,", RHS))))
|
| 188 | 1x |
ssNew <- ss |
| 189 |
#* ***** `Make Null formula` |
|
| 190 | 1x |
ssNew$formula <- stats::as.formula(paste0("y ~ s(", x, ")"))
|
| 191 |
#* `rerun fitGrowth with new formula` |
|
| 192 | 1x |
nullMod <- fitGrowth(ssNew) |
| 193 |
#* `compare models and return values` |
|
| 194 | 1x |
anv <- stats::anova(nullMod, fit, test = "F") |
| 195 | 1x |
out <- list("anova" = anv, "nullMod" = nullMod)
|
| 196 | 1x |
return(out) |
| 197 |
} |
|
| 198 | ||
| 199 | ||
| 200 | ||
| 201 |
#' lme gam testing function |
|
| 202 |
#' @examples |
|
| 203 |
#' |
|
| 204 |
#' logistic_df <- growthSim("logistic",
|
|
| 205 |
#' n = 20, t = 25, |
|
| 206 |
#' params = list("A" = c(200, 160), "B" = c(13, 11), "C" = c(3, 3.5))
|
|
| 207 |
#' ) |
|
| 208 |
#' ss <- growthSS( |
|
| 209 |
#' model = "gam", form = y ~ time | id / group, |
|
| 210 |
#' df = logistic_df, type = "nlme" |
|
| 211 |
#' ) |
|
| 212 |
#' fit <- fitGrowth(ss) |
|
| 213 |
#' .lmeGamAnova(ss, fit)$anova |
|
| 214 |
#' |
|
| 215 |
#' @keywords internal |
|
| 216 |
#' @noRd |
|
| 217 | ||
| 218 |
.lmeGamAnova <- function(ss, fit) {
|
|
| 219 |
#* `Get x variable` |
|
| 220 | ! |
x <- trimws(sub("\\*.*", "", as.character(ss$formula$model)[3]))
|
| 221 | ! |
ssNew <- ss |
| 222 |
#* ***** `Make Null formulas` |
|
| 223 | ! |
newdf <- ss$df |
| 224 | ! |
newdf$dummy <- "A" |
| 225 | ! |
model_form <- ss$formula$model # fixed effects should be constant for likelihood testing |
| 226 |
#* random effects formula |
|
| 227 | ! |
random_form <- stats::setNames(list(nlme::pdIdent(~ splines - 1, data = newdf)), "dummy") |
| 228 |
#* variance formula |
|
| 229 | ! |
weights_form <- .nlme_sigma_form(as.list(ss$call)$sigma, x, "dummy") |
| 230 |
#* correlation formula |
|
| 231 | ! |
correlation_form <- nlme::corAR1(0.8, form = stats::as.formula(paste0("~ 1 | dummy")))
|
| 232 |
#* add all to newSS as list |
|
| 233 | ! |
ssNew$formula <- list( |
| 234 | ! |
"model" = model_form, "random" = random_form, |
| 235 | ! |
"weights" = weights_form, "cor_form" = correlation_form |
| 236 |
) |
|
| 237 | ! |
ssNew$df <- newdf |
| 238 |
#* `rerun fitGrowth with new formula` |
|
| 239 | ! |
nullMod <- fitGrowth(ssNew) |
| 240 |
#* `compare models and return values` |
|
| 241 | ! |
anv <- stats::anova(nullMod, fit) |
| 242 | ! |
out <- list("anova" = anv, "nullMod" = nullMod)
|
| 243 | ! |
return(out) |
| 244 |
} |
|
| 245 | ||
| 246 | ||
| 247 |
#' rq gam testing function for multiple taus |
|
| 248 |
#' @examples |
|
| 249 |
#' |
|
| 250 |
#' logistic_df <- growthSim("logistic",
|
|
| 251 |
#' n = 20, t = 25, |
|
| 252 |
#' params = list("A" = c(200, 160), "B" = c(13, 11), "C" = c(3, 3.5))
|
|
| 253 |
#' ) |
|
| 254 |
#' ss <- growthSS( |
|
| 255 |
#' model = "gam", form = y ~ time | id / group, |
|
| 256 |
#' df = logistic_df, type = "nlrq", tau = c(0.25, 0.5, 0.75) |
|
| 257 |
#' ) |
|
| 258 |
#' fit <- fitGrowth(ss, cores = 3) |
|
| 259 |
#' .rqGamAnova(ss, fit)$"0.25"$anova |
|
| 260 |
#' .rqGamAnova(ss, fit[[1]])$anova |
|
| 261 |
#' |
|
| 262 |
#' @keywords internal |
|
| 263 |
#' @noRd |
|
| 264 | ||
| 265 |
.rqGamAnova <- function(ss, fit) {
|
|
| 266 | ! |
if (methods::is(fit, "rq")) {
|
| 267 | ! |
tau <- as.character(fit$tau) |
| 268 | ! |
fit <- stats::setNames(list(fit), tau) |
| 269 |
} |
|
| 270 | ! |
taus <- names(fit) |
| 271 | ! |
res <- lapply(taus, function(tau) {
|
| 272 | ! |
f <- fit[[tau]] |
| 273 |
#* `remove grouping from ss$formula` |
|
| 274 | ! |
rhs <- as.character(ss$formula)[3] |
| 275 | ! |
newRhs <- trimws(sub("\\*.*", "", rhs))
|
| 276 | ! |
newFormula <- stats::as.formula(paste0(as.character(ss$formula)[2], "~", newRhs)) |
| 277 |
#* `Make new SS object` |
|
| 278 | ! |
ssNew <- ss |
| 279 | ! |
ssNew$formula <- newFormula |
| 280 | ! |
ssNew$taus <- as.numeric(tau) |
| 281 |
#* `rerun fitGrowth with new formula` |
|
| 282 | ! |
nullMod <- fitGrowth(ssNew) |
| 283 |
#* `compare models and return values` |
|
| 284 | ! |
anv <- stats::anova(nullMod, f) |
| 285 | ! |
out <- list("anova" = anv, "nullMod" = nullMod)
|
| 286 | ! |
return(out) |
| 287 |
}) |
|
| 288 | ! |
names(res) <- taus |
| 289 | ! |
if (length(res) == 1) {
|
| 290 | ! |
res <- res[[1]] |
| 291 |
} |
|
| 292 | ! |
return(res) |
| 293 |
} |
|
| 294 | ||
| 295 |
#' lm gam testing function |
|
| 296 |
#' @examples |
|
| 297 |
#' |
|
| 298 |
#' logistic_df <- growthSim("logistic",
|
|
| 299 |
#' n = 20, t = 25, |
|
| 300 |
#' params = list("A" = c(200, 160), "B" = c(13, 11), "C" = c(3, 3.5))
|
|
| 301 |
#' ) |
|
| 302 |
#' ss <- growthSS( |
|
| 303 |
#' model = "gam", form = y ~ time | id / group, |
|
| 304 |
#' df = logistic_df, type = "nls" |
|
| 305 |
#' ) |
|
| 306 |
#' fit <- fitGrowth(ss) |
|
| 307 |
#' .lmGamAnova(ss, fit)$anova |
|
| 308 |
#' |
|
| 309 |
#' @keywords internal |
|
| 310 |
#' @noRd |
|
| 311 | ||
| 312 |
.lmGamAnova <- function(ss, fit) {
|
|
| 313 |
#* `remove grouping from ss$formula` |
|
| 314 | ! |
rhs <- as.character(ss$formula)[3] |
| 315 | ! |
newRhs <- trimws(sub("\\*.*", "", rhs))
|
| 316 | ! |
newFormula <- stats::as.formula(paste0(as.character(ss$formula)[2], "~", newRhs)) |
| 317 |
#* `Make new SS object` |
|
| 318 | ! |
ssNew <- ss |
| 319 | ! |
ssNew$formula <- newFormula |
| 320 |
#* `rerun fitGrowth with new formula` |
|
| 321 | ! |
nullMod <- fitGrowth(ssNew) |
| 322 |
#* `compare models and return values` |
|
| 323 | ! |
anv <- stats::anova(nullMod, fit) |
| 324 | ! |
out <- list("anova" = anv, "nullMod" = nullMod)
|
| 325 | ! |
return(out) |
| 326 |
} |
|
| 327 | ||
| 328 | ||
| 329 |
#' nls and nlme testing function |
|
| 330 |
#' @examples |
|
| 331 |
#' |
|
| 332 |
#' logistic_df <- growthSim("logistic",
|
|
| 333 |
#' n = 20, t = 25, |
|
| 334 |
#' params = list("A" = c(200, 160), "B" = c(13, 11), "C" = c(3, 3.5))
|
|
| 335 |
#' ) |
|
| 336 |
#' ss <- growthSS( |
|
| 337 |
#' model = "logistic", form = y ~ time | id / group, |
|
| 338 |
#' df = logistic_df, type = "nls" |
|
| 339 |
#' ) |
|
| 340 |
#' fit <- fitGrowth(ss) |
|
| 341 |
#' .nlsAnova(ss, fit, test_pars = "A")$anova |
|
| 342 |
#' ss <- growthSS( |
|
| 343 |
#' model = "logistic", form = y ~ time | id / group, |
|
| 344 |
#' df = logistic_df, type = "nlme" |
|
| 345 |
#' ) |
|
| 346 |
#' fit <- fitGrowth(ss) |
|
| 347 |
#' .nlsAnova(ss, fit, test_pars = "A")$anova |
|
| 348 |
#' |
|
| 349 |
#' @keywords internal |
|
| 350 |
#' @noRd |
|
| 351 | ||
| 352 |
.nlsAnova <- function(ss, fit, test_pars = "A") {
|
|
| 353 |
#* `Get parameters to vary in comparison model` |
|
| 354 | 7x |
xForm <- as.character(ss$formula)[3] |
| 355 | 7x |
rMatches <- gregexpr(".2?\\[", xForm)
|
| 356 | 7x |
original_grouped_pars <- sub("\\[", "", regmatches(xForm, rMatches)[[1]])
|
| 357 | 7x |
null_pars <- original_grouped_pars[!original_grouped_pars %in% test_pars] |
| 358 |
#* `match call for previous model, updating pars` |
|
| 359 | 7x |
lcall <- as.list(ss$call) |
| 360 | 7x |
lcall$pars <- null_pars |
| 361 | 7x |
lcall$df <- ss$df |
| 362 | 7x |
new_call <- as.call(lcall) |
| 363 |
#* `rerun growthSS and fitGrowth with updated parameters` |
|
| 364 | 7x |
nullSS <- suppressMessages(eval(new_call)) |
| 365 | 7x |
nullMod <- fitGrowth(nullSS) |
| 366 |
#* `compare models and return values` |
|
| 367 | 7x |
anv <- stats::anova(nullMod, fit) |
| 368 | 7x |
out <- list("anova" = anv, "nullMod" = nullMod)
|
| 369 | 7x |
return(out) |
| 370 |
} |
|
| 371 | ||
| 372 |
#' pseudo LRT function for nlrq |
|
| 373 |
#' @examples |
|
| 374 |
#' |
|
| 375 |
#' logistic_df <- growthSim("logistic",
|
|
| 376 |
#' n = 20, t = 25, |
|
| 377 |
#' params = list("A" = c(200, 160), "B" = c(13, 11), "C" = c(3, 3.5))
|
|
| 378 |
#' ) |
|
| 379 |
#' ss <- growthSS( |
|
| 380 |
#' model = "logistic", form = y ~ time | id / group, |
|
| 381 |
#' df = logistic_df, type = "nlrq", tau = c(0.25, 0.5, 0.75) |
|
| 382 |
#' ) |
|
| 383 |
#' fit <- fitGrowth(ss) |
|
| 384 |
#' .nlrqTest(ss, fit, test_pars = "A")$anova |
|
| 385 |
#' |
|
| 386 |
#' @keywords internal |
|
| 387 |
#' @noRd |
|
| 388 | ||
| 389 |
.nlrqTest <- function(ss, fit, test_pars = "A") {
|
|
| 390 |
#* `Get parameters to vary in comparison model` |
|
| 391 | 3x |
xForm <- as.character(ss$formula)[3] |
| 392 | 3x |
rMatches <- gregexpr(".2?\\[", xForm)
|
| 393 | 3x |
original_grouped_pars <- sub("\\[", "", regmatches(xForm, rMatches)[[1]])
|
| 394 | 3x |
null_pars <- original_grouped_pars[!original_grouped_pars %in% test_pars] |
| 395 |
#* `match call for previous model, updating pars` |
|
| 396 | 3x |
lcall <- as.list(ss$call) |
| 397 | 3x |
lcall$pars <- null_pars |
| 398 | 3x |
lcall$df <- ss$df |
| 399 | 3x |
new_call <- as.call(lcall) |
| 400 |
#* `rerun growthSS and fitGrowth with updated parameters` |
|
| 401 | 3x |
nullSS <- suppressMessages(eval(new_call)) |
| 402 | 3x |
nullMods <- fitGrowth(nullSS) |
| 403 | 3x |
if (methods::is(fit, "nlrq")) {
|
| 404 | 1x |
fit <- list(fit) |
| 405 | 1x |
names(fit) <- ss$taus |
| 406 | 1x |
nullMods <- list(nullMods) |
| 407 | 1x |
names(nullMods) <- ss$taus |
| 408 |
} |
|
| 409 | ||
| 410 |
#* `arrange models for comparisons` |
|
| 411 | ||
| 412 | 3x |
modsList <- lapply(ss$taus, function(tau) list(fit[[paste0(tau)]], nullMods[[paste0(tau)]])) |
| 413 | ||
| 414 | 3x |
res <- lapply(modsList, function(modsPair) {
|
| 415 | 28x |
.nlrqPseudoLRT(modsPair) |
| 416 |
}) |
|
| 417 | 3x |
names(res) <- ss$taus |
| 418 | 3x |
return(res) |
| 419 |
} |
|
| 420 | ||
| 421 | ||
| 422 |
#' pseudo LRT function for nlrq, aiming to use empirical likelihood given more time |
|
| 423 |
#' @param nested_models list of models with the same tau generated by .nlrqTest |
|
| 424 |
#' @keywords internal |
|
| 425 |
#' @noRd |
|
| 426 | ||
| 427 |
.nlrqPseudoLRT <- function(nested_models) {
|
|
| 428 | 28x |
nModels <- length(nested_models) # currently always 2, but might change in the future. |
| 429 | 28x |
rval <- matrix(rep(NA, 5 * 2), ncol = 5) |
| 430 | 28x |
colnames(rval) <- c("#Df", "LogLik", "Df", "Chisq", "Pr(>Chisq)")
|
| 431 | 28x |
rownames(rval) <- 1:2 |
| 432 | 28x |
logL <- lapply(nested_models, stats::logLik) |
| 433 | 28x |
rval[, 1] <- as.numeric(sapply(logL, function(x) attr(x, "df"))) |
| 434 | 28x |
rval[, 2] <- sapply(logL, as.numeric) |
| 435 | 28x |
rval[2:nModels, 3] <- rval[2:nModels, 1] - rval[1:(nModels - 1), 1] |
| 436 | 28x |
rval[2:nModels, 4] <- 2 * abs(rval[2:nModels, 2] - rval[1:(nModels - 1), 2]) |
| 437 | 28x |
rval[, 5] <- stats::pchisq(rval[, 4], round(abs(rval[, 3])), lower.tail = FALSE) |
| 438 | ||
| 439 | 28x |
variables <- lapply(nested_models, function(x) {
|
| 440 | 56x |
deparse(as.list(stats::getCall(x))$formula) |
| 441 |
}) |
|
| 442 | 28x |
header <- paste("Model ", format(1:nModels), ": ", variables, sep = "", collapse = "\n")
|
| 443 | 28x |
out <- structure(as.data.frame(rval), |
| 444 | 28x |
heading = header, |
| 445 | 28x |
class = c("anova", "data.frame")
|
| 446 |
) |
|
| 447 | 28x |
return(out) |
| 448 |
} |
|
| 449 | ||
| 450 | ||
| 451 |
#' Parameterized test for nlrq models using SE |
|
| 452 |
#' @examples |
|
| 453 |
#' logistic_df <- growthSim("logistic",
|
|
| 454 |
#' n = 20, t = 25, |
|
| 455 |
#' params = list("A" = c(200, 160), "B" = c(13, 11), "C" = c(3, 3.5))
|
|
| 456 |
#' ) |
|
| 457 |
#' ss <- growthSS( |
|
| 458 |
#' model = "logistic", form = y ~ time | id / group, |
|
| 459 |
#' df = logistic_df, type = "nlrq", tau = c(0.25, 0.5, 0.75) |
|
| 460 |
#' ) |
|
| 461 |
#' fit <- fitGrowth(ss) |
|
| 462 |
#' .nlrqTest2(ss, fit, test_pars = "a|0.5|A > b|0.5|A") |
|
| 463 |
#' @keywords internal |
|
| 464 |
#' @noRd |
|
| 465 | ||
| 466 |
.nlrqTest2 <- function(ss, fit, test_pars = "a|0.5|A > b|0.5|A") {
|
|
| 467 |
#* `Get parameters to vary in comparison model` |
|
| 468 | 2x |
xForm <- as.character(ss$formula)[3] |
| 469 |
#* `Get name of grouping variable in formula` |
|
| 470 | 2x |
rMatches2 <- regexpr("\\[[a-zA-Z0-9.]*\\]", xForm)
|
| 471 | 2x |
groupVar <- sub("\\]", "", sub("\\[", "", regmatches(xForm, rMatches2)))
|
| 472 |
#* `parse test_pars formula` |
|
| 473 | 2x |
split_form <- lapply(strsplit(test_pars, ">|<")[[1]], function(s) {
|
| 474 | 4x |
stats::setNames(strsplit(trimws(s), "\\|")[[1]], c("group", "tau", "par"))
|
| 475 |
}) |
|
| 476 | 2x |
direction <- ifelse(grepl(">", test_pars), "greater", "lesser")
|
| 477 |
#* `select models based on tau` |
|
| 478 | 2x |
if (!methods::is(fit, "nlrq")) {
|
| 479 | 1x |
fit1 <- fit[[split_form[[1]]["tau"]]] |
| 480 | 1x |
fit2 <- fit[[split_form[[2]]["tau"]]] |
| 481 | 1x |
fits <- list(fit1, fit2) |
| 482 |
} else {
|
|
| 483 | 1x |
fits <- list(fit, fit) |
| 484 |
} |
|
| 485 |
#* `get model data for each group in formula` |
|
| 486 | 2x |
mdf <- do.call(rbind, lapply(seq_along(split_form), function(i) {
|
| 487 | 4x |
sf <- split_form[[i]] |
| 488 |
#* `get model parameters` |
|
| 489 | 4x |
mdf <- as.data.frame(summary(fits[[i]])$coefficients) |
| 490 |
#* `replace 1-nGroups with sorted group names` |
|
| 491 | 4x |
mdf$par <- substr(rownames(mdf), 1, 1) |
| 492 | 4x |
mdf$numericGroup <- sub("^[A-C]", "", rownames(mdf))
|
| 493 | ||
| 494 | 4x |
gNames <- stats::setNames( |
| 495 | 4x |
data.frame(sort(unique(ss$df[[groupVar]])), seq_along(unique(ss$df[[groupVar]]))), |
| 496 | 4x |
c(groupVar, "numericGroup") |
| 497 |
) |
|
| 498 | 4x |
mdf <- merge(mdf, gNames, by = "numericGroup") |
| 499 | 4x |
return(mdf[ |
| 500 | 4x |
mdf[[groupVar]] == sf[["group"]] & mdf[["par"]] == sf[["par"]], |
| 501 | 4x |
c("Value", "Std. Error", "t value", "Pr(>|t|)", "par", "group")
|
| 502 |
]) |
|
| 503 |
})) |
|
| 504 |
#* `make T distributions for comparisons` |
|
| 505 | 2x |
support1 <- extraDistr::qlst(c(0.0001, 0.9999), |
| 506 | 2x |
df = 5, mu = mdf[1, "Value"], |
| 507 | 2x |
sigma = mdf[1, "Std. Error"] |
| 508 |
) |
|
| 509 | 2x |
support2 <- extraDistr::qlst(c(0.0001, 0.9999), |
| 510 | 2x |
df = 5, mu = mdf[2, "Value"], |
| 511 | 2x |
sigma = mdf[2, "Std. Error"] |
| 512 |
) |
|
| 513 | 2x |
mn <- 0.99 * min(c(support1, support2)) |
| 514 | 2x |
mx <- 1.01 * max(c(support1, support2)) |
| 515 | 2x |
support <- seq(mn, mx, length.out = 10000) |
| 516 | 2x |
t1 <- extraDistr::dlst(support, df = 5, mu = mdf[1, "Value"], sigma = mdf[1, "Std. Error"]) |
| 517 | 2x |
t2 <- extraDistr::dlst(support, df = 5, mu = mdf[2, "Value"], sigma = mdf[2, "Std. Error"]) |
| 518 | 2x |
pdf1 <- t1 / sum(t1) |
| 519 | 2x |
pdf2 <- t2 / sum(t2) |
| 520 | 2x |
res <- .post.prob.from.pdfs(pdf1, pdf2, direction)$post.prob |
| 521 | 2x |
mdf[[paste0("prob.", direction)]] <- c(res, NA)
|
| 522 | 2x |
return(mdf) |
| 523 |
} |
|
| 524 | ||
| 525 | ||
| 526 |
#' Chisq based test for survreg and flexsurv models using SE |
|
| 527 |
#' @examples |
|
| 528 |
#' |
|
| 529 |
#' |
|
| 530 |
#' df <- growthSim("logistic",
|
|
| 531 |
#' n = 20, t = 25, |
|
| 532 |
#' params = list("A" = c(200, 160), "B" = c(13, 11), "C" = c(3, 3.5))
|
|
| 533 |
#' ) |
|
| 534 |
#' ss <- growthSS("survival weibull", y > 100 ~ time | id / group, df = df, type = "survreg")
|
|
| 535 |
#' fit <- fitGrowth(ss) |
|
| 536 |
#' .survTest(ss, fit) |
|
| 537 |
#' |
|
| 538 |
#' ss <- growthSS("survival weibull", y > 100 ~ time | id / group, df = df, type = "flexsurv")
|
|
| 539 |
#' fit <- fitGrowth(ss) |
|
| 540 |
#' .survTest(ss, fit) |
|
| 541 |
#' |
|
| 542 |
#' @keywords internal |
|
| 543 |
#' @noRd |
|
| 544 | ||
| 545 |
.survTest <- function(ss = NULL, fit = NULL) {
|
|
| 546 | ! |
if (ss$type == "survreg") {
|
| 547 | ! |
survival::survdiff(formula = ss$formula, data = ss$df) |
| 548 | ! |
} else if (ss$type == "flexsurv") {
|
| 549 | ! |
x <- as.character(ss$pcvrForm)[3] |
| 550 | ! |
x3 <- trimws(strsplit(x, "[|]|[/]")[[1]]) |
| 551 | ! |
group <- x3[length(x3)] |
| 552 | ! |
groups <- unique(ss$df[[group]]) |
| 553 | ! |
at <- lapply(groups, function(i) stats::setNames(list(i), group)) |
| 554 | ! |
as.data.frame(flexsurv::standsurv(fit, at = at, contrast = "difference", se = TRUE, ci = TRUE)) |
| 555 |
} |
|
| 556 |
} |
| 1 |
#' Function to visualize \code{quantreg::rq} general additive growth models.
|
|
| 2 |
#' |
|
| 3 |
#' Models fit using \link{growthSS} inputs by \link{fitGrowth}
|
|
| 4 |
#' (and similar models made through other means) can be visualized easily using this function. |
|
| 5 |
#' This will generally be called by \code{growthPlot}.
|
|
| 6 |
#' |
|
| 7 |
#' @param fit A model fit, or list of model fits, returned by \code{fitGrowth}
|
|
| 8 |
#' with type="nlrq" and model="gam". |
|
| 9 |
#' @param form A formula similar to that in \code{growthSS} inputs (or the \code{pcvrForm}
|
|
| 10 |
#' part of the output) specifying the outcome, |
|
| 11 |
#' predictor, and grouping structure of the data as \code{outcome ~ predictor|individual/group}.
|
|
| 12 |
#' If the individual and group are specified then the observed growth lines are plotted. |
|
| 13 |
#' @param df A dataframe to use in plotting observed growth curves on top of the model. |
|
| 14 |
#' This must be supplied for rq models. |
|
| 15 |
#' @param groups An optional set of groups to keep in the plot. |
|
| 16 |
#' Defaults to NULL in which case all groups in the model are plotted. |
|
| 17 |
#' @param timeRange An optional range of times to use. This can be used to view predictions for |
|
| 18 |
#' future data if the avaiable data has not reached some point (such as asymptotic size). |
|
| 19 |
#' @param facetGroups logical, should groups be separated in facets? Defaults to TRUE. |
|
| 20 |
#' @param groupFill logical, should groups have different colors? Defaults to FALSE. |
|
| 21 |
#' If TRUE then viridis colormaps are used in the order of virMaps |
|
| 22 |
#' @param virMaps order of viridis maps to use. Will be recycled to necessary length. |
|
| 23 |
#' Defaults to "plasma", but will generally be informed by growthPlot's default. |
|
| 24 |
#' @keywords growth-curve |
|
| 25 |
#' @importFrom methods is |
|
| 26 |
#' @import ggplot2 |
|
| 27 |
#' @importFrom stats setNames predict |
|
| 28 |
#' @importFrom viridis plasma |
|
| 29 |
#' @examples |
|
| 30 |
#' |
|
| 31 |
#' |
|
| 32 |
#' simdf <- growthSim("logistic",
|
|
| 33 |
#' n = 20, t = 25, |
|
| 34 |
#' params = list("A" = c(200, 160), "B" = c(13, 11), "C" = c(3, 3.5))
|
|
| 35 |
#' ) |
|
| 36 |
#' ss <- growthSS( |
|
| 37 |
#' model = "gam", form = y ~ time | id / group, |
|
| 38 |
#' tau = c(0.25, 0.5, 0.75), df = simdf, start = NULL, type = "nlrq" |
|
| 39 |
#' ) |
|
| 40 |
#' fits <- fitGrowth(ss) |
|
| 41 |
#' rqPlot(fits, form = ss$pcvrForm, df = ss$df, groupFill = TRUE) |
|
| 42 |
#' rqPlot(fits, form = ss$pcvrForm, df = ss$df, groups = "a", timeRange = 1:10) |
|
| 43 |
#' |
|
| 44 |
#' ss <- growthSS( |
|
| 45 |
#' model = "gam", form = y ~ time | group, |
|
| 46 |
#' tau = c(0.5), df = simdf, start = NULL, type = "nlrq" |
|
| 47 |
#' ) |
|
| 48 |
#' fit <- fitGrowth(ss) |
|
| 49 |
#' rqPlot(fit, form = ss$pcvrForm, df = ss$df, groupFill = TRUE) |
|
| 50 |
#' |
|
| 51 |
#' @return Returns a ggplot showing an rq general additive model's quantiles |
|
| 52 |
#' and optionally the individual growth lines. |
|
| 53 |
#' |
|
| 54 |
#' @export |
|
| 55 | ||
| 56 |
rqPlot <- function(fit, form, df = NULL, groups = NULL, timeRange = NULL, facetGroups = TRUE, |
|
| 57 |
groupFill = FALSE, virMaps = c("plasma")) {
|
|
| 58 |
#* `get needed information from formula` |
|
| 59 | 3x |
parsed_form <- .parsePcvrForm(form, df) |
| 60 |
#* `pick longitudinal or non-longitudinal helper` |
|
| 61 | 3x |
if (!is.numeric(df[, parsed_form$x]) && !parsed_form$USEG && !parsed_form$USEID) {
|
| 62 | ! |
p <- .rqStaticPlot( |
| 63 | ! |
fit, form, df, groups, timeRange, |
| 64 | ! |
facetGroups, groupFill, virMaps, parsed_form |
| 65 |
) |
|
| 66 | ! |
return(p) |
| 67 |
} |
|
| 68 | 3x |
p <- .rqLongitudinalPlot( |
| 69 | 3x |
fit, form, df, groups, timeRange, |
| 70 | 3x |
facetGroups, groupFill, virMaps, parsed_form |
| 71 |
) |
|
| 72 | 3x |
return(p) |
| 73 |
} |
|
| 74 | ||
| 75 |
#' @keywords internal |
|
| 76 |
#' @noRd |
|
| 77 | ||
| 78 |
.rqStaticPlot <- function(fit, form, df, groups, timeRange, |
|
| 79 |
facetGroups, groupFill, virMaps, parsed_form) {
|
|
| 80 | ! |
x <- parsed_form$x |
| 81 | ! |
df <- parsed_form$data |
| 82 | ||
| 83 | ! |
if (methods::is(fit, "rq")) {
|
| 84 | ! |
fit <- list(fit) |
| 85 |
} |
|
| 86 | ||
| 87 | ! |
summary_df <- do.call(rbind, lapply(fit, function(model) {
|
| 88 | ! |
iter_df <- as.data.frame(coef(summary(model))) |
| 89 | ! |
colnames(iter_df) <- c("est", "err", "t", "p")
|
| 90 | ! |
iter_df[[x]] <- rownames(iter_df) |
| 91 | ! |
iter_df[1, x] <- paste0(x, unique(df[[x]])[1]) |
| 92 | ! |
iter_df[["est"]] <- cumsum(iter_df[["est"]]) |
| 93 | ! |
iter_df$tau <- model$tau |
| 94 | ! |
iter_df |
| 95 |
})) |
|
| 96 | ||
| 97 |
#* `filter by groups if groups != NULL` |
|
| 98 | ! |
if (!is.null(groups)) {
|
| 99 | ! |
summary_df <- summary_df[summary_df[[x]] %in% groups, ] |
| 100 |
} |
|
| 101 |
#* `facetGroups` |
|
| 102 | ! |
facet_layer <- NULL |
| 103 | ! |
if (facetGroups) {
|
| 104 | ! |
facet_layer <- ggplot2::facet_wrap(stats::as.formula(paste0("~", x)),
|
| 105 | ! |
scales = "free_x" |
| 106 |
) |
|
| 107 |
} |
|
| 108 |
#* `groupFill` |
|
| 109 | ! |
n_taus <- length(unique(summary_df$tau)) |
| 110 | ! |
if (groupFill) {
|
| 111 | ! |
virList <- lapply(rep(virMaps, length.out = length(unique(summary_df[[x]]))), function(pal) {
|
| 112 | ! |
virpal_p1 <- viridis::viridis(ceiling(n_taus / 2), |
| 113 | ! |
direction = 1, end = 1, option = pal |
| 114 |
) |
|
| 115 | ! |
virpal_p2 <- viridis::viridis(ceiling(n_taus / 2), |
| 116 | ! |
direction = -1, end = 1, option = pal |
| 117 | ! |
)[-1] |
| 118 | ! |
c(virpal_p1, virpal_p2) |
| 119 |
}) |
|
| 120 |
} else {
|
|
| 121 | ! |
virpal_p1 <- viridis::plasma(ceiling(n_taus / 2), direction = 1, end = 1) |
| 122 | ! |
virpal_p2 <- viridis::plasma(ceiling(n_taus / 2), direction = -1, end = 1)[-1] |
| 123 | ! |
virpal <- c(virpal_p1, virpal_p2) |
| 124 | ! |
virList <- lapply(seq_along(unique(summary_df[[x]])), function(i) {
|
| 125 | ! |
virpal |
| 126 |
}) |
|
| 127 |
} |
|
| 128 |
#* `plot` |
|
| 129 | ! |
plot <- ggplot(summary_df, ggplot2::aes(group = interaction(.data[[x]]))) + |
| 130 | ! |
facet_layer + |
| 131 | ! |
labs(x = x, y = as.character(form)[2]) + |
| 132 | ! |
pcv_theme() |
| 133 | ||
| 134 | ! |
for (g in seq_along(unique(summary_df[[x]]))) {
|
| 135 | ! |
iteration_group <- unique(summary_df[[x]])[g] |
| 136 | ! |
sub <- summary_df[summary_df[[x]] == iteration_group, ] |
| 137 | ! |
for (i in seq_along(unique(sub$tau))) {
|
| 138 | ! |
inner_sub <- sub[sub$tau == unique(sub$tau)[i], ] |
| 139 | ! |
plot <- plot + |
| 140 | ! |
ggplot2::geom_errorbar(data = inner_sub, ggplot2::aes( |
| 141 | ! |
x = .data[[x]], |
| 142 | ! |
ymin = .data[["est"]] - 2 * .data[["err"]], |
| 143 | ! |
ymax = .data[["est"]] + 2 * .data[["err"]] |
| 144 | ! |
), width = 0.15, color = virList[[g]][i]) + |
| 145 | ! |
ggplot2::geom_point( |
| 146 | ! |
data = inner_sub, ggplot2::aes( |
| 147 | ! |
x = .data[[x]], |
| 148 | ! |
y = .data[["est"]] |
| 149 |
), |
|
| 150 | ! |
color = virList[[g]][i], size = 4 |
| 151 |
) + |
|
| 152 | ! |
ggplot2::geom_text( |
| 153 | ! |
data = inner_sub, ggplot2::aes( |
| 154 | ! |
x = .data[[x]], |
| 155 | ! |
y = .data[["est"]], |
| 156 | ! |
label = .data[["tau"]] |
| 157 |
), |
|
| 158 | ! |
size = 2, color = "white" |
| 159 |
) |
|
| 160 |
} |
|
| 161 |
} |
|
| 162 | ! |
plot |
| 163 | ! |
return(plot) |
| 164 |
} |
|
| 165 | ||
| 166 |
#' @keywords internal |
|
| 167 |
#' @noRd |
|
| 168 | ||
| 169 |
.rqLongitudinalPlot <- function(fit, form, df, groups, timeRange, |
|
| 170 |
facetGroups, groupFill, virMaps, parsed_form) {
|
|
| 171 | 3x |
y <- parsed_form$y |
| 172 | 3x |
x <- parsed_form$x |
| 173 | 3x |
individual <- parsed_form$individual |
| 174 | 3x |
if (individual == "dummyIndividual") {
|
| 175 | 1x |
individual <- NULL |
| 176 |
} |
|
| 177 | 3x |
group <- parsed_form$group |
| 178 | 3x |
df <- parsed_form$data |
| 179 |
#* `filter by groups if groups != NULL` |
|
| 180 | 3x |
if (!is.null(groups)) {
|
| 181 | 1x |
df <- df[df[[group]] %in% groups, ] |
| 182 |
} |
|
| 183 |
#* `make new data if timerange is not NULL` |
|
| 184 | 3x |
if (!is.null(timeRange)) {
|
| 185 | 1x |
new_data <- do.call(rbind, lapply(unique(df[[group]]), function(g) {
|
| 186 | 1x |
stats::setNames(data.frame(g, timeRange), c(group, x)) |
| 187 |
})) |
|
| 188 | 1x |
df <- df[df[[x]] >= min(timeRange) & df[[x]] <= max(timeRange), ] |
| 189 |
} else {
|
|
| 190 | 2x |
new_data <- df |
| 191 |
} |
|
| 192 |
#* `standardize fit class` |
|
| 193 | 3x |
if (methods::is(fit, "rq")) {
|
| 194 | 1x |
fit <- list(fit) |
| 195 | 1x |
names(fit) <- fit$tau |
| 196 |
} |
|
| 197 |
#* `add predictions and record taus` |
|
| 198 | 3x |
preds <- do.call(cbind, lapply(fit, function(f) {
|
| 199 | 7x |
tau <- f$tau |
| 200 | 7x |
stats::setNames(data.frame(stats::predict(f, newdata = new_data)), paste0("Q_", tau))
|
| 201 |
})) |
|
| 202 | 3x |
predCols <- colnames(preds) |
| 203 | 3x |
keep <- which(!duplicated(preds)) |
| 204 | 3x |
plotdf <- cbind(df[keep, ], preds[keep, ]) |
| 205 | 3x |
colnames(plotdf) <- c(colnames(df), colnames(preds)) |
| 206 |
#* `layer for individual lines if formula was complete` |
|
| 207 | 3x |
individual_lines <- list() |
| 208 | 3x |
if (!is.null(individual)) {
|
| 209 | 2x |
individual_lines <- ggplot2::geom_line( |
| 210 | 2x |
data = df, ggplot2::aes( |
| 211 | 2x |
x = .data[[x]], y = .data[[y]], |
| 212 | 2x |
group = interaction( |
| 213 | 2x |
.data[[individual]], |
| 214 | 2x |
.data[[group]] |
| 215 |
) |
|
| 216 |
), |
|
| 217 | 2x |
linewidth = 0.25, color = "gray40" |
| 218 |
) |
|
| 219 |
} |
|
| 220 |
#* `facetGroups` |
|
| 221 | 3x |
facet_layer <- NULL |
| 222 | 3x |
if (facetGroups) {
|
| 223 | 3x |
facet_layer <- ggplot2::facet_wrap(stats::as.formula(paste0("~", group)))
|
| 224 |
} |
|
| 225 |
#* `groupFill` |
|
| 226 | 3x |
if (groupFill) {
|
| 227 | 2x |
virList <- lapply(rep(virMaps, length.out = length(unique(df[[group]]))), function(pal) {
|
| 228 | 4x |
virpal_p1 <- viridis::viridis(ceiling(length(predCols) / 2), direction = 1, end = 1, option = pal) |
| 229 | 4x |
virpal_p2 <- viridis::viridis(ceiling(length(predCols) / 2), |
| 230 | 4x |
direction = -1, end = 1, option = pal |
| 231 | 4x |
)[-1] |
| 232 | 4x |
c(virpal_p1, virpal_p2) |
| 233 |
}) |
|
| 234 |
} else {
|
|
| 235 | 1x |
virpal_p1 <- viridis::plasma(ceiling(length(predCols) / 2), direction = 1, end = 1) |
| 236 | 1x |
virpal_p2 <- viridis::plasma(ceiling(length(predCols) / 2), direction = -1, end = 1)[-1] |
| 237 | 1x |
virpal <- c(virpal_p1, virpal_p2) |
| 238 | 1x |
virList <- lapply(seq_along(unique(df[[group]])), function(i) {
|
| 239 | 1x |
virpal |
| 240 |
}) |
|
| 241 |
} |
|
| 242 |
#* `plot` |
|
| 243 | 3x |
plot <- ggplot(plotdf, ggplot2::aes(group = interaction(.data[[group]]))) + |
| 244 | 3x |
facet_layer + |
| 245 | 3x |
individual_lines + |
| 246 | 3x |
labs(x = x, y = as.character(form)[2]) + |
| 247 | 3x |
pcv_theme() |
| 248 | ||
| 249 | 3x |
for (g in seq_along(unique(plotdf[[group]]))) {
|
| 250 | 5x |
iteration_group <- unique(plotdf[[group]])[g] |
| 251 | 5x |
sub <- plotdf[plotdf[[group]] == iteration_group, ] |
| 252 | 5x |
plot <- plot + |
| 253 | 5x |
lapply(seq_along(predCols), function(i) {
|
| 254 | 11x |
ggplot2::geom_line( |
| 255 | 11x |
data = sub, ggplot2::aes(x = .data[[x]], y = .data[[predCols[i]]]), |
| 256 | 11x |
color = virList[[g]][i], linewidth = 0.7 |
| 257 |
) |
|
| 258 |
}) |
|
| 259 |
} |
|
| 260 | 3x |
return(plot) |
| 261 |
} |
| 1 |
#' Ease of use nlrq/nls starter function for standard growth model parameterizations |
|
| 2 |
#' |
|
| 3 |
#' Internal to growthSS |
|
| 4 |
#' |
|
| 5 |
#' @examples |
|
| 6 |
#' |
|
| 7 |
#' simdf <- growthSim("logistic",
|
|
| 8 |
#' n = 20, t = 25, |
|
| 9 |
#' params = list("A" = c(200, 160), "B" = c(13, 11), "C" = c(3, 3.5))
|
|
| 10 |
#' ) |
|
| 11 |
#' |
|
| 12 |
#' ss <- .nlrqSS( |
|
| 13 |
#' model = "logistic", form = y ~ time | id / group, |
|
| 14 |
#' tau = 0.5, df = simdf, start = NULL |
|
| 15 |
#' ) |
|
| 16 |
#' model <- "logistic" |
|
| 17 |
#' form <- y ~ time | id / group |
|
| 18 |
#' tau <- 0.5 |
|
| 19 |
#' df <- simdf |
|
| 20 |
#' start <- NULL |
|
| 21 |
#' pars <- NULL |
|
| 22 |
#' ss <- .nlrqSS(model = "gam", form = y ~ time | id / group, df = simdf, start = NULL, tau = 0.5) |
|
| 23 |
#' |
|
| 24 |
#' dim(ss$df) |
|
| 25 |
#' ss[c("formula", "taus", "start", "pcvrForm")]
|
|
| 26 |
#' @importFrom splines bs |
|
| 27 |
#' @importFrom stats sortedXyData |
|
| 28 |
#' @keywords internal |
|
| 29 |
#' @noRd |
|
| 30 | ||
| 31 |
.nlrqSS <- function(model, form, tau = 0.5, df, pars = NULL, start = NULL, type = "nlrq", int = FALSE) {
|
|
| 32 |
#* ***** `Define choices and make empty output list` |
|
| 33 | 58x |
out <- list() |
| 34 | 58x |
models <- c( |
| 35 | 58x |
"logistic", "gompertz", "monomolecular", "exponential", "linear", "power law", |
| 36 | 58x |
"double logistic", "double gompertz", "gam", "frechet", "weibull", "gumbel", "logarithmic", |
| 37 | 58x |
"bragg", "lorentz", "beta" |
| 38 |
) |
|
| 39 |
#* ***** `Make nlrq formula` ***** |
|
| 40 |
#* `parse form argument` |
|
| 41 | 58x |
parsed_form <- .parsePcvrForm(form, df) |
| 42 | 58x |
y <- parsed_form$y |
| 43 | 58x |
x <- parsed_form$x |
| 44 | 58x |
group <- parsed_form$group |
| 45 | 58x |
USEGROUP <- parsed_form$USEG |
| 46 | 58x |
if (parsed_form$USEID) {
|
| 47 | 54x |
message(paste0("Individual is not used with type = '", type, "'."))
|
| 48 |
} |
|
| 49 | 58x |
df <- parsed_form$data |
| 50 | 58x |
if (USEGROUP) {
|
| 51 | 56x |
df[[group]] <- factor(df[[group]]) |
| 52 | 56x |
df[[paste0(group, "_numericLabel")]] <- unclass(df[[group]]) |
| 53 |
} |
|
| 54 |
#* `assemble growth formula` |
|
| 55 | 58x |
if (grepl("decay", model)) {
|
| 56 | ! |
decay <- TRUE |
| 57 | ! |
model <- trimws(gsub("decay", "", model))
|
| 58 |
} else {
|
|
| 59 | 58x |
decay <- FALSE |
| 60 |
} |
|
| 61 | ||
| 62 | 58x |
matched_model <- match.arg(model, models) |
| 63 | 58x |
stringFormFun <- paste0(".nlrq_form_", gsub(" ", "", matched_model))
|
| 64 | 58x |
form_fun <- match.fun(stringFormFun) |
| 65 | 58x |
res <- form_fun(x, y, USEGROUP, group, pars, int) |
| 66 | 58x |
growthForm <- res[[1]] |
| 67 | 58x |
pars <- res[[2]] |
| 68 | ||
| 69 | 58x |
if (decay) {
|
| 70 | ! |
growthForm <- .nlrqDecay(growthForm) |
| 71 |
} |
|
| 72 | ||
| 73 | 58x |
if (matched_model == "gam") {
|
| 74 | 8x |
start <- 0 |
| 75 |
} |
|
| 76 | ||
| 77 | 58x |
if (is.null(start)) {
|
| 78 | 50x |
if (grepl("double", matched_model)) {
|
| 79 | 5x |
warning(paste0( |
| 80 | 5x |
"Double Sigmoid models are not supported as self-starting models,", |
| 81 | 5x |
" you will need to add starting parameters.", |
| 82 | 5x |
" Note for these models type='brms' is recommended." |
| 83 |
)) |
|
| 84 | 5x |
startingValues <- NULL |
| 85 |
} else {
|
|
| 86 | 45x |
stringInitFun <- paste0(".init", gsub(" ", "", matched_model))
|
| 87 | 45x |
initFunction <- match.fun(stringInitFun) |
| 88 | 45x |
startingValues <- initFunction(df, x, y, int) |
| 89 |
} |
|
| 90 | 50x |
if ((!matched_model %in% c("double logistic", "double gompertz")) && USEGROUP) {
|
| 91 | 43x |
nms <- names(startingValues) |
| 92 | 43x |
startingValuesList <- lapply(names(startingValues), function(nm) {
|
| 93 | 112x |
val <- startingValues[nm] |
| 94 | 112x |
if (nm %in% pars) {
|
| 95 | 104x |
rep(val, length(unique(df[[group]]))) |
| 96 |
# if this is one of pars then make starting value per group |
|
| 97 |
} else {
|
|
| 98 | 8x |
val |
| 99 | 43x |
} # else return one starting value |
| 100 |
}) |
|
| 101 | 43x |
names(startingValuesList) <- nms |
| 102 |
} else { # non-grouped, just make it into a list
|
|
| 103 | 7x |
startingValuesList <- as.list(startingValues) |
| 104 |
} |
|
| 105 |
} else {
|
|
| 106 | 8x |
startingValuesList <- start |
| 107 |
} |
|
| 108 | 58x |
out[["formula"]] <- growthForm |
| 109 | 58x |
if (type == "nlrq") {
|
| 110 | 26x |
out[["taus"]] <- tau |
| 111 |
} |
|
| 112 | 58x |
out[["start"]] <- startingValuesList |
| 113 | 58x |
out[["df"]] <- df |
| 114 | 58x |
out[["pcvrForm"]] <- form |
| 115 | 58x |
return(out) |
| 116 |
} |
|
| 117 | ||
| 118 |
#' example of using selfStart |
|
| 119 |
#' `logistic self starter` |
|
| 120 |
#' @examples |
|
| 121 |
#' simdf <- growthSim("logistic",
|
|
| 122 |
#' n = 20, t = 25, |
|
| 123 |
#' params = list("A" = c(200, 160), "B" = c(13, 11), "C" = c(3, 3.5))
|
|
| 124 |
#' ) |
|
| 125 |
#' .initlogistic(simdf, "time", "y") |
|
| 126 |
#' |
|
| 127 |
#' @keywords internal |
|
| 128 |
#' @noRd |
|
| 129 | ||
| 130 |
.initlogistic <- function(df, x, y, int = FALSE) {
|
|
| 131 | 27x |
if (int) {
|
| 132 | 1x |
obs_min <- min(df[[y]], na.rm = TRUE) |
| 133 | 1x |
df[[y]] <- df[[y]] - obs_min |
| 134 |
} |
|
| 135 | 27x |
xy <- stats::sortedXyData(df[[x]], df[[y]]) |
| 136 | 27x |
if (nrow(xy) < 4) {
|
| 137 | 1x |
stop("too few distinct input values to fit a logistic model")
|
| 138 |
} |
|
| 139 | 26x |
z <- abs(xy[["y"]]) |
| 140 |
## transform to proportion, i.e. in (0,1) : |
|
| 141 | 26x |
rng <- range(z) |
| 142 | 26x |
dz <- diff(rng) |
| 143 | 26x |
z <- (z - rng[1L] + 0.05 * dz) / (1.1 * dz) |
| 144 | 26x |
xy[["z"]] <- log(z / (1 - z)) # logit transformation |
| 145 | 26x |
aux <- stats::coef(stats::lm(x ~ z, xy)) |
| 146 | 26x |
pars <- stats::coef(stats::nls(y ~ 1 / (1 + exp((B - x) / C)), |
| 147 | 26x |
data = xy, |
| 148 | 26x |
start = list(B = aux[[1L]], C = aux[[2L]]), |
| 149 | 26x |
algorithm = "plinear", control = stats::nls.control(warnOnly = TRUE) |
| 150 |
)) |
|
| 151 | 26x |
start <- stats::setNames(pars[c(".lin", "B", "C")], c("A", "B", "C"))
|
| 152 | 26x |
if (int) {
|
| 153 | 1x |
start <- stats::setNames(append(obs_min, start), c("I", "A", "B", "C"))
|
| 154 |
} |
|
| 155 | 26x |
return(start) |
| 156 |
} |
|
| 157 | ||
| 158 | ||
| 159 |
#' `Goempertz self starter` |
|
| 160 |
#' |
|
| 161 |
#' .initgompertz(simdf, "time", "y") |
|
| 162 |
#' @keywords internal |
|
| 163 |
#' @noRd |
|
| 164 | ||
| 165 |
.initgompertz <- function(df, x, y, int) {
|
|
| 166 | 6x |
if (int) {
|
| 167 | 1x |
obs_min <- min(df[[y]], na.rm = TRUE) |
| 168 | 1x |
df[[y]] <- df[[y]] - obs_min |
| 169 |
} |
|
| 170 | 6x |
xy <- stats::sortedXyData(df[[x]], df[[y]]) |
| 171 | 6x |
if (nrow(xy) < 4) {
|
| 172 | 1x |
stop("too few distinct input values to fit the Gompertz model")
|
| 173 |
} |
|
| 174 | 5x |
xyL <- xy |
| 175 | 5x |
xyL$y <- log(abs(xyL$y)) |
| 176 | 5x |
pars <- stats::NLSstAsymptotic(xyL) |
| 177 | 5x |
pars <- stats::coef(stats::nls(y ~ exp(-B * C^x), |
| 178 | 5x |
data = xy, start = c( |
| 179 | 5x |
B = pars[["b1"]], |
| 180 | 5x |
C = exp(-exp(pars[["lrc"]])) |
| 181 |
), |
|
| 182 | 5x |
algorithm = "plinear", control = stats::nls.control(warnOnly = TRUE) |
| 183 |
)) |
|
| 184 | 5x |
start <- stats::setNames(pars[c(".lin", "B", "C")], c("A", "B", "C"))
|
| 185 | 5x |
if (int) {
|
| 186 | 1x |
start <- stats::setNames(append(obs_min, start), c("I", "A", "B", "C"))
|
| 187 |
} |
|
| 188 | 5x |
return(start) |
| 189 |
} |
|
| 190 | ||
| 191 | ||
| 192 |
#' `Monomolecular self starter` |
|
| 193 |
#' ex<-growthSim("monomolecular", n=20, t=25,
|
|
| 194 |
#' params = list("A"=c(200,160), "B"=c(0.08, 0.1)))
|
|
| 195 |
#' .initmonomolecular(ex, "time", "y") |
|
| 196 |
#' @keywords internal |
|
| 197 |
#' @noRd |
|
| 198 | ||
| 199 |
.initmonomolecular <- function(df, x, y, int) {
|
|
| 200 | 6x |
if (int) {
|
| 201 | 1x |
obs_min <- min(df[[y]], na.rm = TRUE) |
| 202 | 1x |
df[[y]] <- df[[y]] - obs_min |
| 203 |
} |
|
| 204 | 6x |
xy <- stats::sortedXyData(df[[x]], df[[y]]) |
| 205 | 6x |
if (nrow(xy) < 4) {
|
| 206 | 1x |
stop("too few distinct input values to fit a monomolecular model")
|
| 207 |
} |
|
| 208 | 5x |
z <- abs(xy[["y"]]) |
| 209 |
## transform to proportion, i.e. in (0,1) : |
|
| 210 | 5x |
rng <- range(z) |
| 211 | 5x |
dz <- diff(rng) |
| 212 | 5x |
z <- (z - rng[1L] + 0.05 * dz) / (1.1 * dz) |
| 213 | 5x |
xy[["z"]] <- z |
| 214 | 5x |
aux <- stats::coef(stats::lm(z ~ x, xy)) |
| 215 | 5x |
pars <- stats::coef(stats::nls(y ~ 1 * exp(B * x), |
| 216 | 5x |
data = xy, start = list(B = aux[2L]), |
| 217 | 5x |
algorithm = "plinear", control = stats::nls.control(warnOnly = TRUE) |
| 218 |
)) |
|
| 219 | 5x |
start <- stats::setNames(pars[c(".lin", "B")], c("A", "B"))
|
| 220 | 5x |
if (int) {
|
| 221 | 1x |
start <- stats::setNames(append(obs_min, start), c("I", "A", "B"))
|
| 222 |
} |
|
| 223 | 5x |
return(start) |
| 224 |
} |
|
| 225 | ||
| 226 |
#' `Linear self starter` |
|
| 227 |
#' @examples |
|
| 228 |
#' ex <- growthSim("linear",
|
|
| 229 |
#' n = 20, t = 25, |
|
| 230 |
#' params = list("A" = c(1.1, 0.95))
|
|
| 231 |
#' ) |
|
| 232 |
#' .initlinear(ex, "time", "y") |
|
| 233 |
#' @keywords internal |
|
| 234 |
#' @noRd |
|
| 235 | ||
| 236 |
.initlinear <- function(df, x, y, int) {
|
|
| 237 | 9x |
if (int) {
|
| 238 | 4x |
obs_min <- min(df[[y]], na.rm = TRUE) |
| 239 | 4x |
df[[y]] <- df[[y]] - obs_min |
| 240 |
} |
|
| 241 | 9x |
xy <- stats::sortedXyData(df[[x]], df[[y]]) |
| 242 | 9x |
if (nrow(xy) < 2) {
|
| 243 | 1x |
stop("too few distinct input values to fit a linear model")
|
| 244 |
} |
|
| 245 | 8x |
pars <- stats::coef(stats::lm(y ~ x, xy)) |
| 246 | 8x |
start <- stats::setNames(pars[c("x")], c("A"))
|
| 247 | 8x |
if (int) {
|
| 248 | 4x |
start <- stats::setNames(append(obs_min, start), c("I", "A"))
|
| 249 |
} |
|
| 250 | 8x |
return(start) |
| 251 |
} |
|
| 252 | ||
| 253 |
#' `Logarithmic self starter` |
|
| 254 |
#' @examples |
|
| 255 |
#' ex <- growthSim("logarithmic",
|
|
| 256 |
#' n = 20, t = 25, |
|
| 257 |
#' params = list("A" = c(1.1, 0.95))
|
|
| 258 |
#' ) |
|
| 259 |
#' .initlinear(ex, "time", "y") |
|
| 260 |
#' @keywords internal |
|
| 261 |
#' @noRd |
|
| 262 | ||
| 263 |
.initlogarithmic <- function(df, x, y, int) {
|
|
| 264 | 6x |
if (int) {
|
| 265 | 1x |
obs_min <- min(df[[y]], na.rm = TRUE) |
| 266 | 1x |
df[[y]] <- df[[y]] - obs_min |
| 267 |
} |
|
| 268 | 6x |
xy <- stats::sortedXyData(df[[x]], df[[y]]) |
| 269 | 6x |
if (nrow(xy) < 2) {
|
| 270 | 1x |
stop("too few distinct input values to fit a logarithmic model")
|
| 271 |
} |
|
| 272 | 5x |
pars <- stats::coef(stats::lm(y ~ log(x), xy)) |
| 273 | 5x |
start <- stats::setNames(pars[c("log(x)")], c("A"))
|
| 274 | 5x |
if (int) {
|
| 275 | 1x |
start <- stats::setNames(append(obs_min, start), c("I", "A"))
|
| 276 |
} |
|
| 277 | 5x |
return(start) |
| 278 |
} |
|
| 279 | ||
| 280 | ||
| 281 |
#' `power law self starter` |
|
| 282 |
#' @examples |
|
| 283 |
#' ex <- growthSim("power law",
|
|
| 284 |
#' n = 20, t = 25, |
|
| 285 |
#' params = list("A" = c(16, 11), "B" = c(0.75, 0.7))
|
|
| 286 |
#' ) |
|
| 287 |
#' .initPowerLaw(df, "time", "y") |
|
| 288 |
#' @keywords internal |
|
| 289 |
#' @noRd |
|
| 290 | ||
| 291 |
.initpowerlaw <- function(df, x, y, int) {
|
|
| 292 | 6x |
if (int) {
|
| 293 | 1x |
obs_min <- min(df[[y]], na.rm = TRUE) |
| 294 | 1x |
df[[y]] <- df[[y]] - obs_min |
| 295 |
} |
|
| 296 | 6x |
xy <- stats::sortedXyData(df[[x]], df[[y]]) |
| 297 | 6x |
if (nrow(xy) < 3) {
|
| 298 | 1x |
stop("too few distinct input values to fit a power law model")
|
| 299 |
} |
|
| 300 | 5x |
aux <- stats::coef(stats::lm(y ~ x, xy)) |
| 301 | ||
| 302 | 5x |
pars <- stats::coef(stats::nls(y ~ 1 * x^B, |
| 303 | 5x |
data = xy, start = list(B = aux[2L]), |
| 304 | 5x |
algorithm = "plinear", control = stats::nls.control(warnOnly = TRUE) |
| 305 |
)) |
|
| 306 | 5x |
start <- stats::setNames(pars[c(".lin", "B")], c("A", "B"))
|
| 307 | 5x |
if (int) {
|
| 308 | 1x |
start <- stats::setNames(append(obs_min, start), c("I", "A", "B"))
|
| 309 |
} |
|
| 310 | 5x |
return(start) |
| 311 |
} |
|
| 312 | ||
| 313 | ||
| 314 |
#' `exponential self starter` |
|
| 315 |
#' @examples |
|
| 316 |
#' ex <- growthSim("exponential",
|
|
| 317 |
#' n = 20, t = 25, |
|
| 318 |
#' params = list("A" = c(15, 20), "B" = c(0.095, 0.095))
|
|
| 319 |
#' ) |
|
| 320 |
#' .initexponential(ex, "time", "y") |
|
| 321 |
#' @keywords internal |
|
| 322 |
#' @noRd |
|
| 323 | ||
| 324 |
.initexponential <- function(df, x, y, int) {
|
|
| 325 | 6x |
if (int) {
|
| 326 | 1x |
obs_min <- min(df[[y]], na.rm = TRUE) |
| 327 | 1x |
df[[y]] <- df[[y]] - obs_min |
| 328 |
} |
|
| 329 | 6x |
xy <- stats::sortedXyData(df[[x]], df[[y]]) |
| 330 | 6x |
if (nrow(xy) < 3) {
|
| 331 | 1x |
stop("too few distinct input values to fit a exponential model")
|
| 332 |
} |
|
| 333 | 5x |
aux <- stats::coef(stats::lm(y ~ x, xy)) |
| 334 | ||
| 335 | 5x |
pars <- stats::coef(stats::nls(y ~ 1 * exp(B * x), |
| 336 | 5x |
data = xy, start = list(B = aux[2L]), |
| 337 | 5x |
algorithm = "plinear", control = stats::nls.control(warnOnly = TRUE) |
| 338 |
)) |
|
| 339 | 5x |
start <- stats::setNames(pars[c(".lin", "B")], c("A", "B"))
|
| 340 | 5x |
if (int) {
|
| 341 | 1x |
start <- stats::setNames(append(obs_min, start), c("I", "A", "B"))
|
| 342 |
} |
|
| 343 | 5x |
return(start) |
| 344 |
} |
|
| 345 | ||
| 346 |
#' `Extreme Value Distribution Self Starter (weibull, frechet, gumbel)` |
|
| 347 |
#' @keywords internal |
|
| 348 |
#' @noRd |
|
| 349 | ||
| 350 |
.initweibull <- function(df, x, y, int) {
|
|
| 351 | 18x |
if (int) {
|
| 352 | 3x |
obs_min <- min(df[[y]], na.rm = TRUE) |
| 353 | 3x |
df[[y]] <- df[[y]] - obs_min |
| 354 |
} |
|
| 355 | 18x |
xy <- stats::sortedXyData(df[[x]], df[[y]]) |
| 356 | 18x |
if (nrow(xy) < 5) {
|
| 357 | 3x |
stop("too few distinct input values to fit the EVD growth model")
|
| 358 |
} |
|
| 359 | 15x |
rAsym <- stats::NLSstRtAsymptote(xy) |
| 360 | 15x |
pars <- stats::coef(stats::lm( |
| 361 | 15x |
log(-log((rAsym - y) / (rAsym - stats::NLSstLfAsymptote(xy)))) ~ log(x), |
| 362 | 15x |
data = xy, subset = x > 0 |
| 363 |
)) |
|
| 364 | 15x |
start <- stats::setNames(c(rAsym, exp(pars) + c(1, 0)), c("A", "B", "C"))
|
| 365 | 15x |
if (int) {
|
| 366 | 3x |
start <- stats::setNames(append(obs_min, start), c("I", "A", "B", "C"))
|
| 367 |
} |
|
| 368 | 15x |
return(start) |
| 369 |
} |
|
| 370 |
.initfrechet <- .initweibull |
|
| 371 |
.initgumbel <- .initweibull |
|
| 372 | ||
| 373 |
#' `bragg DRC self starter` |
|
| 374 |
#' @examples |
|
| 375 |
#' ex <- growthSim("bragg",
|
|
| 376 |
#' n = 20, t = 25, |
|
| 377 |
#' params = list("A" = c(10, 15), "B" = c(0.01, 0.02), "C" = c(50, 60))
|
|
| 378 |
#' ) |
|
| 379 |
#' .initbragg(ex, "time", "y") |
|
| 380 |
#' @keywords internal |
|
| 381 |
#' @noRd |
|
| 382 | ||
| 383 |
.initbragg <- function(df, x, y, int) {
|
|
| 384 | 5x |
if (int) {
|
| 385 | 1x |
obs_min <- min(df[[y]], na.rm = TRUE) |
| 386 | 1x |
df[[y]] <- df[[y]] - obs_min |
| 387 |
} |
|
| 388 | 5x |
xy <- stats::sortedXyData(df[[x]], df[[y]]) |
| 389 | 5x |
x <- xy[, "x"] |
| 390 | 5x |
y <- xy[, "y"] |
| 391 | 5x |
A <- max(y) # amplitude, conventionally d |
| 392 | 5x |
C <- x[which.max(y)] # position of midpoint, conventionally e |
| 393 | 5x |
pseudoY <- log((y + 1e-04) / A) |
| 394 | 5x |
pseudoX <- (x - C)^2 |
| 395 | 5x |
dat <- data.frame(pseudoY = pseudoY, pseudoX = pseudoX) |
| 396 | 5x |
coefs <- coef(lm(pseudoY ~ pseudoX - 1, data = dat)) |
| 397 | 5x |
B <- -coefs[1] # pseudo precision/slope at inflection, conventionally b |
| 398 | 5x |
start <- stats::setNames(c(B, A, C), c("B", "A", "C"))
|
| 399 | 5x |
if (int) {
|
| 400 | 1x |
start <- stats::setNames(append(obs_min, start), c("I", "B", "A", "C"))
|
| 401 |
} |
|
| 402 | 5x |
return(start) |
| 403 |
} |
|
| 404 | ||
| 405 |
#' `lorentz DRC self starter` |
|
| 406 |
#' @examples |
|
| 407 |
#' ex <- growthSim("lorentz",
|
|
| 408 |
#' n = 20, t = 25, |
|
| 409 |
#' params = list("A" = c(10, 15), "B" = c(0.01, 0.02), "C" = c(50, 60))
|
|
| 410 |
#' ) |
|
| 411 |
#' .initlorentz(ex, "time", "y") |
|
| 412 |
#' @keywords internal |
|
| 413 |
#' @noRd |
|
| 414 | ||
| 415 |
.initlorentz <- function(df, x, y, int) {
|
|
| 416 | 5x |
if (int) {
|
| 417 | 1x |
obs_min <- min(df[[y]], na.rm = TRUE) |
| 418 | 1x |
df[[y]] <- df[[y]] - obs_min |
| 419 |
} |
|
| 420 | 5x |
xy <- stats::sortedXyData(df[[x]], df[[y]]) |
| 421 | 5x |
x <- xy[, "x"] |
| 422 | 5x |
y <- xy[, "y"] |
| 423 | 5x |
A <- max(y) # amplitude, conventionally d |
| 424 | 5x |
C <- x[which.max(y)] # position of midpoint, conventionally e |
| 425 | 5x |
pseudoY <- (A - y) / y |
| 426 | 5x |
pseudoX <- (x - C)^2 |
| 427 | 5x |
dat <- data.frame(pseudoY = pseudoY, pseudoX = pseudoX) |
| 428 | 5x |
coefs <- coef(lm(pseudoY ~ pseudoX - 1, data = dat)) |
| 429 | 5x |
B <- coefs[1] # pseudo precision/slope at inflection, conventionally b |
| 430 | 5x |
start <- stats::setNames(c(B, A, C), c("B", "A", "C"))
|
| 431 | 5x |
if (int) {
|
| 432 | 1x |
start <- stats::setNames(append(obs_min, start), c("I", "B", "A", "C"))
|
| 433 |
} |
|
| 434 | 5x |
return(start) |
| 435 |
} |
|
| 436 | ||
| 437 |
#' `beta DRC self starter` |
|
| 438 |
#' @examples |
|
| 439 |
#' ex <- growthSim("beta",
|
|
| 440 |
#' n = 20, t = 25, |
|
| 441 |
#' params = list("A" = 10, "B" = 1.2, "C" = 15, "D" = 8, "E" = 19)
|
|
| 442 |
#' ) |
|
| 443 |
#' .initbeta(ex, "time", "y") |
|
| 444 |
#' @keywords internal |
|
| 445 |
#' @noRd |
|
| 446 | ||
| 447 |
.initbeta <- function(df, x, y, int) {
|
|
| 448 | 5x |
if (int) {
|
| 449 | 1x |
obs_min <- min(df[[y]], na.rm = TRUE) |
| 450 | 1x |
df[[y]] <- df[[y]] - obs_min |
| 451 |
} |
|
| 452 | 5x |
xy <- stats::sortedXyData(df[[x]], df[[y]]) |
| 453 | 5x |
x <- xy[, "x"] |
| 454 | 5x |
y <- xy[, "y"] |
| 455 | 5x |
A <- max(y) |
| 456 | 5x |
C <- x[which.max(y)] |
| 457 | 5x |
firstidx <- min(which(y != 0)) |
| 458 | 5x |
D <- ifelse(firstidx == 1, |
| 459 | 5x |
x[1], |
| 460 | 5x |
(x[firstidx] + x[(firstidx - 1)]) / 2 |
| 461 |
) |
|
| 462 | 5x |
secidx <- max(which(y != 0)) |
| 463 | 5x |
E <- ifelse(secidx == length(y), |
| 464 | 5x |
x[length(x)], |
| 465 | 5x |
(x[secidx] + x[(secidx + 1)]) / 2 |
| 466 |
) |
|
| 467 | 5x |
start <- stats::setNames(c(A, 1, C, D, E), c("A", "B", "C", "D", "E"))
|
| 468 | 5x |
if (int) {
|
| 469 | 1x |
start <- stats::setNames(append(obs_min, start), c("I", "A", "B", "C", "D", "E"))
|
| 470 |
} |
|
| 471 | 5x |
return(start) |
| 472 |
} |
|
| 473 | ||
| 474 | ||
| 475 |
#' `Define growth formulas` |
|
| 476 |
#' @keywords internal |
|
| 477 |
#' @noRd |
|
| 478 | ||
| 479 |
.nlrq_form_logistic <- function(x, y, USEGROUP, group, pars, int = FALSE) {
|
|
| 480 | 20x |
if (int) {
|
| 481 | 1x |
total_pars <- c("I", "A", "B", "C")
|
| 482 |
} else {
|
|
| 483 | 19x |
total_pars <- c("A", "B", "C")
|
| 484 |
} |
|
| 485 | 20x |
if (is.null(pars)) {
|
| 486 | 13x |
pars <- total_pars |
| 487 |
} |
|
| 488 | 20x |
if (int) {
|
| 489 | 1x |
str_nf <- paste0(y, " ~I[] + (A[]/(1+exp((B[]-", x, ")/C[])))") |
| 490 |
} else {
|
|
| 491 | 19x |
str_nf <- paste0(y, " ~ A[]/(1+exp((B[]-", x, ")/C[]))") |
| 492 |
} |
|
| 493 | 20x |
if (USEGROUP) {
|
| 494 | 19x |
for (par in total_pars) {
|
| 495 | 57x |
if (par %in% pars) {
|
| 496 | 49x |
str_nf <- gsub(paste0(par, "\\[\\]"), paste0(par, "[", group, "]"), str_nf) |
| 497 |
} else {
|
|
| 498 | 8x |
str_nf <- gsub(paste0(par, "\\[\\]"), par, str_nf) |
| 499 |
} |
|
| 500 |
} |
|
| 501 | 19x |
nf <- as.formula(str_nf) |
| 502 |
} else {
|
|
| 503 | 1x |
nf <- as.formula(gsub("\\[|\\]", "", str_nf))
|
| 504 |
} |
|
| 505 | 20x |
return(list("formula" = nf, "pars" = pars))
|
| 506 |
} |
|
| 507 | ||
| 508 |
.nlrq_form_gompertz <- function(x, y, USEGROUP, group, pars, int = FALSE) {
|
|
| 509 | 4x |
if (int) {
|
| 510 | 1x |
total_pars <- c("I", "A", "B", "C")
|
| 511 |
} else {
|
|
| 512 | 3x |
total_pars <- c("A", "B", "C")
|
| 513 |
} |
|
| 514 | 4x |
if (is.null(pars)) {
|
| 515 | 3x |
pars <- total_pars |
| 516 |
} |
|
| 517 | 4x |
if (int) {
|
| 518 | 1x |
str_nf <- paste0(y, " ~ I[] + (A[]*exp(-B[]*exp(-C[]*", x, ")))") |
| 519 |
} else {
|
|
| 520 | 3x |
str_nf <- paste0(y, " ~ A[]*exp(-B[]*exp(-C[]*", x, "))") |
| 521 |
} |
|
| 522 | 4x |
if (USEGROUP) {
|
| 523 | 3x |
for (par in total_pars) {
|
| 524 | 9x |
if (par %in% pars) {
|
| 525 | 7x |
str_nf <- gsub(paste0(par, "\\[\\]"), paste0(par, "[", group, "]"), str_nf) |
| 526 |
} else {
|
|
| 527 | 2x |
str_nf <- gsub(paste0(par, "\\[\\]"), par, str_nf) |
| 528 |
} |
|
| 529 |
} |
|
| 530 | 3x |
nf <- as.formula(str_nf) |
| 531 |
} else {
|
|
| 532 | 1x |
nf <- as.formula(gsub("\\[|\\]", "", str_nf))
|
| 533 |
} |
|
| 534 | 4x |
return(list("formula" = nf, "pars" = pars))
|
| 535 |
} |
|
| 536 | ||
| 537 |
.nlrq_form_doublelogistic <- function(x, y, USEGROUP, group, pars, int = FALSE) {
|
|
| 538 | 5x |
if (int) {
|
| 539 | 1x |
total_pars <- c("I", "A", "B", "C", "A2", "B2", "C2")
|
| 540 |
} else {
|
|
| 541 | 4x |
total_pars <- c("A", "B", "C", "A2", "B2", "C2")
|
| 542 |
} |
|
| 543 | 5x |
if (is.null(pars)) {
|
| 544 | 4x |
pars <- total_pars |
| 545 |
} |
|
| 546 | 5x |
if (int) {
|
| 547 | 1x |
str_nf <- paste0( |
| 548 | 1x |
y, " ~ I[] + (A[]/(1+exp((B[]-", x, ")/C[]))", |
| 549 | 1x |
" + ((A2[]-A[]) /(1+exp((B2[]-", x, ")/C2[]))))" |
| 550 |
) |
|
| 551 |
} else {
|
|
| 552 | 4x |
str_nf <- paste0( |
| 553 | 4x |
y, " ~ A[]/(1+exp((B[]-", x, ")/C[]))", |
| 554 | 4x |
" + ((A2[]-A[]) /(1+exp((B2[]-", x, ")/C2[])))" |
| 555 |
) |
|
| 556 |
} |
|
| 557 | 5x |
if (USEGROUP) {
|
| 558 | 4x |
for (par in total_pars) {
|
| 559 | 24x |
if (par %in% pars) {
|
| 560 | 19x |
str_nf <- gsub(paste0(par, "\\[\\]"), paste0(par, "[", group, "]"), str_nf) |
| 561 |
} else {
|
|
| 562 | 5x |
str_nf <- gsub(paste0(par, "\\[\\]"), par, str_nf) |
| 563 |
} |
|
| 564 |
} |
|
| 565 | 4x |
nf <- as.formula(str_nf) |
| 566 |
} else {
|
|
| 567 | 1x |
nf <- as.formula(gsub("\\[|\\]", "", str_nf))
|
| 568 |
} |
|
| 569 | 5x |
return(list("formula" = nf, "pars" = pars))
|
| 570 |
} |
|
| 571 | ||
| 572 |
.nlrq_form_doublegompertz <- function(x, y, USEGROUP, group, pars, int = FALSE) {
|
|
| 573 | 4x |
if (int) {
|
| 574 | 1x |
total_pars <- c("I", "A", "B", "C", "A2", "B2", "C2")
|
| 575 |
} else {
|
|
| 576 | 3x |
total_pars <- c("A", "B", "C", "A2", "B2", "C2")
|
| 577 |
} |
|
| 578 | 4x |
if (is.null(pars)) {
|
| 579 | 3x |
pars <- total_pars |
| 580 |
} |
|
| 581 | 4x |
if (int) {
|
| 582 | 1x |
str_nf <- paste0( |
| 583 | 1x |
y, " ~ I[] + (A[] * exp(-B[] * exp(-C[]*", x, "))", |
| 584 | 1x |
" + (A2[]-A[]) * exp(-B2[] * exp(-C2[]*(", x, "-B[]))))"
|
| 585 |
) |
|
| 586 |
} else {
|
|
| 587 | 3x |
str_nf <- paste0( |
| 588 | 3x |
y, " ~ A[] * exp(-B[] * exp(-C[]*", x, "))", |
| 589 | 3x |
" + (A2[]-A[]) * exp(-B2[] * exp(-C2[]*(", x, "-B[])))"
|
| 590 |
) |
|
| 591 |
} |
|
| 592 | 4x |
if (USEGROUP) {
|
| 593 | 3x |
for (par in total_pars) {
|
| 594 | 18x |
if (par %in% pars) {
|
| 595 | 13x |
str_nf <- gsub(paste0(par, "\\[\\]"), paste0(par, "[", group, "]"), str_nf) |
| 596 |
} else {
|
|
| 597 | 5x |
str_nf <- gsub(paste0(par, "\\[\\]"), par, str_nf) |
| 598 |
} |
|
| 599 |
} |
|
| 600 | 3x |
nf <- as.formula(str_nf) |
| 601 |
} else {
|
|
| 602 | 1x |
nf <- as.formula(gsub("\\[|\\]", "", str_nf))
|
| 603 |
} |
|
| 604 | 4x |
return(list("formula" = nf, "pars" = pars))
|
| 605 |
} |
|
| 606 | ||
| 607 |
.nlrq_form_monomolecular <- function(x, y, USEGROUP, group, pars, int = FALSE) {
|
|
| 608 | 4x |
if (int) {
|
| 609 | 1x |
total_pars <- c("I", "A", "B")
|
| 610 |
} else {
|
|
| 611 | 3x |
total_pars <- c("A", "B")
|
| 612 |
} |
|
| 613 | 4x |
if (is.null(pars)) {
|
| 614 | 3x |
pars <- total_pars |
| 615 |
} |
|
| 616 | 4x |
if (int) {
|
| 617 | 1x |
str_nf <- paste0(y, "~I[] + (A[]-A[]*exp(-B[]*", x, "))") |
| 618 |
} else {
|
|
| 619 | 3x |
str_nf <- paste0(y, "~A[]-A[]*exp(-B[]*", x, ")") |
| 620 |
} |
|
| 621 | 4x |
if (USEGROUP) {
|
| 622 | 3x |
for (par in total_pars) {
|
| 623 | 6x |
if (par %in% pars) {
|
| 624 | 5x |
str_nf <- gsub(paste0(par, "\\[\\]"), paste0(par, "[", group, "]"), str_nf) |
| 625 |
} else {
|
|
| 626 | 1x |
str_nf <- gsub(paste0(par, "\\[\\]"), par, str_nf) |
| 627 |
} |
|
| 628 |
} |
|
| 629 | 3x |
nf <- as.formula(str_nf) |
| 630 |
} else {
|
|
| 631 | 1x |
nf <- as.formula(gsub("\\[|\\]", "", str_nf))
|
| 632 |
} |
|
| 633 | 4x |
return(list("formula" = nf, "pars" = pars))
|
| 634 |
} |
|
| 635 | ||
| 636 |
.nlrq_form_exponential <- function(x, y, USEGROUP, group, pars, int = FALSE) {
|
|
| 637 | 4x |
if (int) {
|
| 638 | 1x |
total_pars <- c("I", "A", "B")
|
| 639 |
} else {
|
|
| 640 | 3x |
total_pars <- c("A", "B")
|
| 641 |
} |
|
| 642 | 4x |
if (is.null(pars)) {
|
| 643 | 3x |
pars <- total_pars |
| 644 |
} |
|
| 645 | 4x |
if (int) {
|
| 646 | 1x |
str_nf <- paste0(y, " ~ I[] + (A[]*exp(B[]*", x, "))") |
| 647 |
} else {
|
|
| 648 | 3x |
str_nf <- paste0(y, " ~ A[]*exp(B[]*", x, ")") |
| 649 |
} |
|
| 650 | 4x |
if (USEGROUP) {
|
| 651 | 3x |
for (par in total_pars) {
|
| 652 | 6x |
if (par %in% pars) {
|
| 653 | 5x |
str_nf <- gsub(paste0(par, "\\[\\]"), paste0(par, "[", group, "]"), str_nf) |
| 654 |
} else {
|
|
| 655 | 1x |
str_nf <- gsub(paste0(par, "\\[\\]"), par, str_nf) |
| 656 |
} |
|
| 657 |
} |
|
| 658 | 3x |
nf <- as.formula(str_nf) |
| 659 |
} else {
|
|
| 660 | 1x |
nf <- as.formula(gsub("\\[|\\]", "", str_nf))
|
| 661 |
} |
|
| 662 | 4x |
return(list("formula" = nf, "pars" = pars))
|
| 663 |
} |
|
| 664 | ||
| 665 |
.nlrq_form_linear <- function(x, y, USEGROUP, group, pars, int = FALSE) {
|
|
| 666 | 8x |
if (int) {
|
| 667 | 5x |
total_pars <- c("I", "A")
|
| 668 |
} else {
|
|
| 669 | 3x |
total_pars <- c("A")
|
| 670 |
} |
|
| 671 | 8x |
if (is.null(pars)) {
|
| 672 | 4x |
pars <- total_pars |
| 673 |
} |
|
| 674 | 8x |
if (int) {
|
| 675 | 5x |
str_nf <- paste0(y, " ~ I[] + A[]*", x) |
| 676 |
} else {
|
|
| 677 | 3x |
str_nf <- paste0(y, " ~ A[]*", x) |
| 678 |
} |
|
| 679 | 8x |
if (USEGROUP) {
|
| 680 | 7x |
for (par in total_pars) {
|
| 681 | 11x |
if (par %in% pars) {
|
| 682 | 8x |
str_nf <- gsub(paste0(par, "\\[\\]"), paste0(par, "[", group, "]"), str_nf) |
| 683 |
} else {
|
|
| 684 | 3x |
str_nf <- gsub(paste0(par, "\\[\\]"), par, str_nf) |
| 685 |
} |
|
| 686 |
} |
|
| 687 | 7x |
nf <- as.formula(str_nf) |
| 688 |
} else {
|
|
| 689 | 1x |
nf <- as.formula(gsub("\\[|\\]", "", str_nf))
|
| 690 |
} |
|
| 691 | 8x |
return(list("formula" = nf, "pars" = pars))
|
| 692 |
} |
|
| 693 | ||
| 694 |
.nlrq_form_powerlaw <- function(x, y, USEGROUP, group, pars, int = FALSE) {
|
|
| 695 | 4x |
if (int) {
|
| 696 | 1x |
total_pars <- c("I", "A", "B")
|
| 697 |
} else {
|
|
| 698 | 3x |
total_pars <- c("A", "B")
|
| 699 |
} |
|
| 700 | 4x |
if (is.null(pars)) {
|
| 701 | 3x |
pars <- total_pars |
| 702 |
} |
|
| 703 | 4x |
if (int) {
|
| 704 | 1x |
str_nf <- paste0(y, " ~ I[] + (A[]*", x, "^B[])") |
| 705 |
} else {
|
|
| 706 | 3x |
str_nf <- paste0(y, " ~ A[]*", x, "^B[]") |
| 707 |
} |
|
| 708 | 4x |
if (USEGROUP) {
|
| 709 | 3x |
for (par in total_pars) {
|
| 710 | 6x |
if (par %in% pars) {
|
| 711 | 5x |
str_nf <- gsub(paste0(par, "\\[\\]"), paste0(par, "[", group, "]"), str_nf) |
| 712 |
} else {
|
|
| 713 | 1x |
str_nf <- gsub(paste0(par, "\\[\\]"), par, str_nf) |
| 714 |
} |
|
| 715 |
} |
|
| 716 | 3x |
nf <- as.formula(str_nf) |
| 717 |
} else {
|
|
| 718 | 1x |
nf <- as.formula(gsub("\\[|\\]", "", str_nf))
|
| 719 |
} |
|
| 720 | 4x |
return(list("formula" = nf, "pars" = pars))
|
| 721 |
} |
|
| 722 | ||
| 723 |
.nlrq_form_gam <- function(x, y, USEGROUP, group, pars, int = FALSE) {
|
|
| 724 | 10x |
if (USEGROUP) {
|
| 725 | 9x |
nf <- as.formula(paste0(y, " ~ bs(", x, ")*", group))
|
| 726 |
} else {
|
|
| 727 | 1x |
nf <- as.formula(paste0(y, " ~ bs(", x, ")"))
|
| 728 |
} |
|
| 729 | 10x |
return(list("formula" = nf, "pars" = NULL))
|
| 730 |
} |
|
| 731 | ||
| 732 |
.nlrqDecay <- function(form) {
|
|
| 733 | ! |
chars <- as.character(form) |
| 734 | ! |
as.formula(paste0(chars[2], chars[1], "-(", chars[3], ")"))
|
| 735 |
} |
|
| 736 | ||
| 737 |
.nlrq_form_frechet <- function(x, y, USEGROUP, group, pars, int = FALSE) {
|
|
| 738 | 4x |
if (int) {
|
| 739 | 1x |
total_pars <- c("I", "A", "B", "C")
|
| 740 |
} else {
|
|
| 741 | 3x |
total_pars <- c("A", "B", "C")
|
| 742 |
} |
|
| 743 | 4x |
if (is.null(pars)) {
|
| 744 | 3x |
pars <- total_pars |
| 745 |
} |
|
| 746 | 4x |
if (int) {
|
| 747 | 1x |
str_nf <- paste0(y, " ~ I[] + (A[] * exp(-((", x, "-0)/C[])^(-B[])))")
|
| 748 |
} else {
|
|
| 749 | 3x |
str_nf <- paste0(y, " ~ A[] * exp(-((", x, "-0)/C[])^(-B[]))")
|
| 750 |
} |
|
| 751 | 4x |
if (USEGROUP) {
|
| 752 | 3x |
for (par in total_pars) {
|
| 753 | 9x |
if (par %in% pars) {
|
| 754 | 7x |
str_nf <- gsub(paste0(par, "\\[\\]"), paste0(par, "[", group, "]"), str_nf) |
| 755 |
} else {
|
|
| 756 | 2x |
str_nf <- gsub(paste0(par, "\\[\\]"), par, str_nf) |
| 757 |
} |
|
| 758 |
} |
|
| 759 | 3x |
nf <- as.formula(str_nf) |
| 760 |
} else {
|
|
| 761 | 1x |
nf <- as.formula(gsub("\\[|\\]", "", str_nf))
|
| 762 |
} |
|
| 763 | 4x |
return(list("formula" = nf, "pars" = pars))
|
| 764 |
} |
|
| 765 | ||
| 766 | ||
| 767 |
.nlrq_form_weibull <- function(x, y, USEGROUP, group, pars, int = FALSE) {
|
|
| 768 | 4x |
if (int) {
|
| 769 | 1x |
total_pars <- c("I", "A", "B", "C")
|
| 770 |
} else {
|
|
| 771 | 3x |
total_pars <- c("A", "B", "C")
|
| 772 |
} |
|
| 773 | 4x |
if (is.null(pars)) {
|
| 774 | 3x |
pars <- total_pars |
| 775 |
} |
|
| 776 | 4x |
if (int) {
|
| 777 | 1x |
str_nf <- paste0(y, " ~ I[] + (A[] * (1-exp(-(", x, "/C[])^B[])))")
|
| 778 |
} else {
|
|
| 779 | 3x |
str_nf <- paste0(y, " ~ A[] * (1-exp(-(", x, "/C[])^B[]))")
|
| 780 |
} |
|
| 781 | 4x |
if (USEGROUP) {
|
| 782 | 3x |
for (par in total_pars) {
|
| 783 | 9x |
if (par %in% pars) {
|
| 784 | 7x |
str_nf <- gsub(paste0(par, "\\[\\]"), paste0(par, "[", group, "]"), str_nf) |
| 785 |
} else {
|
|
| 786 | 2x |
str_nf <- gsub(paste0(par, "\\[\\]"), par, str_nf) |
| 787 |
} |
|
| 788 |
} |
|
| 789 | 3x |
nf <- as.formula(str_nf) |
| 790 |
} else {
|
|
| 791 | 1x |
nf <- as.formula(gsub("\\[|\\]", "", str_nf))
|
| 792 |
} |
|
| 793 | 4x |
return(list("formula" = nf, "pars" = pars))
|
| 794 |
} |
|
| 795 | ||
| 796 |
.nlrq_form_gumbel <- function(x, y, USEGROUP, group, pars, int = FALSE) {
|
|
| 797 | 4x |
if (int) {
|
| 798 | 1x |
total_pars <- c("I", "A", "B", "C")
|
| 799 |
} else {
|
|
| 800 | 3x |
total_pars <- c("A", "B", "C")
|
| 801 |
} |
|
| 802 | 4x |
if (is.null(pars)) {
|
| 803 | 3x |
pars <- total_pars |
| 804 |
} |
|
| 805 | 4x |
if (int) {
|
| 806 | 1x |
str_nf <- paste0(y, " ~ I[] + (A[] * exp(-exp(-(", x, "-B[])/C[])))")
|
| 807 |
} else {
|
|
| 808 | 3x |
str_nf <- paste0(y, " ~ A[] * exp(-exp(-(", x, "-B[])/C[]))")
|
| 809 |
} |
|
| 810 | 4x |
if (USEGROUP) {
|
| 811 | 3x |
for (par in total_pars) {
|
| 812 | 9x |
if (par %in% pars) {
|
| 813 | 7x |
str_nf <- gsub(paste0(par, "\\[\\]"), paste0(par, "[", group, "]"), str_nf) |
| 814 |
} else {
|
|
| 815 | 2x |
str_nf <- gsub(paste0(par, "\\[\\]"), par, str_nf) |
| 816 |
} |
|
| 817 |
} |
|
| 818 | 3x |
nf <- as.formula(str_nf) |
| 819 |
} else {
|
|
| 820 | 1x |
nf <- as.formula(gsub("\\[|\\]", "", str_nf))
|
| 821 |
} |
|
| 822 | 4x |
return(list("formula" = nf, "pars" = pars))
|
| 823 |
} |
|
| 824 | ||
| 825 |
.nlrq_form_logarithmic <- function(x, y, USEGROUP, group, pars, int = FALSE) {
|
|
| 826 | 5x |
if (int) {
|
| 827 | 2x |
total_pars <- c("I", "A")
|
| 828 |
} else {
|
|
| 829 | 3x |
total_pars <- c("A")
|
| 830 |
} |
|
| 831 | 5x |
if (is.null(pars)) {
|
| 832 | 3x |
pars <- total_pars |
| 833 |
} |
|
| 834 | 5x |
if (int) {
|
| 835 | 2x |
str_nf <- paste0(y, " ~ I[] + A[] * log(", x, ")")
|
| 836 |
} else {
|
|
| 837 | 3x |
str_nf <- paste0(y, " ~ A[] * log(", x, ")")
|
| 838 |
} |
|
| 839 | 5x |
if (USEGROUP) {
|
| 840 | 4x |
for (par in total_pars) {
|
| 841 | 5x |
if (par %in% pars) {
|
| 842 | 4x |
str_nf <- gsub(paste0(par, "\\[\\]"), paste0(par, "[", group, "]"), str_nf) |
| 843 |
} else {
|
|
| 844 | 1x |
str_nf <- gsub(paste0(par, "\\[\\]"), par, str_nf) |
| 845 |
} |
|
| 846 |
} |
|
| 847 | 4x |
nf <- as.formula(str_nf) |
| 848 |
} else {
|
|
| 849 | 1x |
nf <- as.formula(gsub("\\[|\\]", "", str_nf))
|
| 850 |
} |
|
| 851 | 5x |
return(list("formula" = nf, "pars" = pars))
|
| 852 |
} |
|
| 853 | ||
| 854 |
.nlrq_form_bragg <- function(x, y, USEGROUP, group, pars, int = FALSE) {
|
|
| 855 | 4x |
if (int) {
|
| 856 | 1x |
total_pars <- c("I", "A", "B", "C")
|
| 857 |
} else {
|
|
| 858 | 3x |
total_pars <- c("A", "B", "C")
|
| 859 |
} |
|
| 860 | 4x |
if (is.null(pars)) {
|
| 861 | 3x |
pars <- total_pars |
| 862 |
} |
|
| 863 | 4x |
if (int) {
|
| 864 | 1x |
str_nf <- paste0(y, " ~ I[] + A[] * exp(-B[] * (", x, " - C[])^2)")
|
| 865 |
} else {
|
|
| 866 | 3x |
str_nf <- paste0(y, " ~ A[] * exp(-B[] * (", x, " - C[])^2)")
|
| 867 |
} |
|
| 868 | 4x |
if (USEGROUP) {
|
| 869 | 3x |
for (par in total_pars) {
|
| 870 | 9x |
if (par %in% pars) {
|
| 871 | 7x |
str_nf <- gsub(paste0(par, "\\[\\]"), paste0(par, "[", group, "]"), str_nf) |
| 872 |
} else {
|
|
| 873 | 2x |
str_nf <- gsub(paste0(par, "\\[\\]"), par, str_nf) |
| 874 |
} |
|
| 875 |
} |
|
| 876 | 3x |
nf <- as.formula(str_nf) |
| 877 |
} else {
|
|
| 878 | 1x |
nf <- as.formula(gsub("\\[|\\]", "", str_nf))
|
| 879 |
} |
|
| 880 | 4x |
return(list("formula" = nf, "pars" = pars))
|
| 881 |
} |
|
| 882 | ||
| 883 |
.nlrq_form_lorentz <- function(x, y, USEGROUP, group, pars, int = FALSE) {
|
|
| 884 | 4x |
if (int) {
|
| 885 | 1x |
total_pars <- c("I", "A", "B", "C")
|
| 886 |
} else {
|
|
| 887 | 3x |
total_pars <- c("A", "B", "C")
|
| 888 |
} |
|
| 889 | 4x |
if (is.null(pars)) {
|
| 890 | 3x |
pars <- total_pars |
| 891 |
} |
|
| 892 | 4x |
if (int) {
|
| 893 | 1x |
str_nf <- paste0(y, " ~ I[] + A[] / (1 + B[] * (", x, " - C[]) ^ 2)")
|
| 894 |
} else {
|
|
| 895 | 3x |
str_nf <- paste0(y, " ~ A[] / (1 + B[] * (", x, " - C[]) ^ 2)")
|
| 896 |
} |
|
| 897 | 4x |
if (USEGROUP) {
|
| 898 | 3x |
for (par in total_pars) {
|
| 899 | 9x |
if (par %in% pars) {
|
| 900 | 7x |
str_nf <- gsub(paste0(par, "\\[\\]"), paste0(par, "[", group, "]"), str_nf) |
| 901 |
} else {
|
|
| 902 | 2x |
str_nf <- gsub(paste0(par, "\\[\\]"), par, str_nf) |
| 903 |
} |
|
| 904 |
} |
|
| 905 | 3x |
nf <- as.formula(str_nf) |
| 906 |
} else {
|
|
| 907 | 1x |
nf <- as.formula(gsub("\\[|\\]", "", str_nf))
|
| 908 |
} |
|
| 909 | 4x |
return(list("formula" = nf, "pars" = pars))
|
| 910 |
} |
|
| 911 | ||
| 912 |
.nlrq_form_beta <- function(x, y, USEGROUP, group, pars, int = FALSE) {
|
|
| 913 | 4x |
if (int) {
|
| 914 | 1x |
total_pars <- c("I", "A", "B", "C", "D", "E")
|
| 915 |
} else {
|
|
| 916 | 3x |
total_pars <- c("A", "B", "C", "D", "E")
|
| 917 |
} |
|
| 918 | 4x |
if (is.null(pars)) {
|
| 919 | 3x |
pars <- total_pars |
| 920 |
} |
|
| 921 | 4x |
if (int) {
|
| 922 | 1x |
str_nf <- paste0( |
| 923 | 1x |
y, " ~ I[] + A[] * (((", x, " - D[]) / (C[] - D[])) * ((E[] - ", x,
|
| 924 | 1x |
") / (E[] - C[])) ^ ((E[] - C[]) / (C[] - D[]))) ^ B[]" |
| 925 |
) |
|
| 926 |
} else {
|
|
| 927 | 3x |
str_nf <- paste0( |
| 928 | 3x |
y, " ~ A[] * (((", x, " - D[]) / (C[] - D[])) * ((E[] - ", x,
|
| 929 | 3x |
") / (E[] - C[])) ^ ((E[] - C[]) / (C[] - D[]))) ^ B[]" |
| 930 |
) |
|
| 931 |
} |
|
| 932 | 4x |
if (USEGROUP) {
|
| 933 | 1x |
for (par in total_pars) {
|
| 934 | 5x |
if (par %in% pars) {
|
| 935 | 1x |
str_nf <- gsub(paste0(par, "\\[\\]"), paste0(par, "[", group, "]"), str_nf) |
| 936 |
} else {
|
|
| 937 | 4x |
str_nf <- gsub(paste0(par, "\\[\\]"), par, str_nf) |
| 938 |
} |
|
| 939 |
} |
|
| 940 | 1x |
nf <- as.formula(str_nf) |
| 941 |
} else {
|
|
| 942 | 3x |
nf <- as.formula(gsub("\\[|\\]", "", str_nf))
|
| 943 |
} |
|
| 944 | 4x |
return(list("formula" = nf, "pars" = pars))
|
| 945 |
} |
| 1 |
#' Function to visualize common \code{stats::nls} growth models.
|
|
| 2 |
#' |
|
| 3 |
#' Models fit using \link{growthSS} inputs by \link{fitGrowth}
|
|
| 4 |
#' (and similar models made through other means) can be visualized easily using this function. |
|
| 5 |
#' This will generally be called by \code{growthPlot}.
|
|
| 6 |
#' |
|
| 7 |
#' @param fit A model fit returned by \code{fitGrowth} with type="nls".
|
|
| 8 |
#' @param form A formula similar to that in \code{growthSS} inputs (or the \code{pcvrForm}
|
|
| 9 |
#' part of the output) specifying the outcome, predictor, and grouping structure of the data as |
|
| 10 |
#' \code{outcome ~ predictor|individual/group}.
|
|
| 11 |
#' If the individual and group are specified then the observed growth lines are plotted. |
|
| 12 |
#' @param groups An optional set of groups to keep in the plot. |
|
| 13 |
#' Defaults to NULL in which case all groups in the model are plotted. |
|
| 14 |
#' @param df A dataframe to use in plotting observed growth curves on top of the model. |
|
| 15 |
#' This must be supplied for nls models. |
|
| 16 |
#' @param timeRange An optional range of times to use. This can be used to view predictions for |
|
| 17 |
#' future data if the avaiable data has not reached some point (such as asymptotic size). |
|
| 18 |
#' @param facetGroups logical, should groups be separated in facets? Defaults to TRUE. |
|
| 19 |
#' @param groupFill logical, should groups have different colors? Defaults to FALSE. |
|
| 20 |
#' If TRUE then viridis colormaps are used in the order of virMaps |
|
| 21 |
#' @param virMaps order of viridis maps to use. Will be recycled to necessary length. |
|
| 22 |
#' Defaults to "plasma", but will generally be informed by growthPlot's default. |
|
| 23 |
#' @keywords growth-curve |
|
| 24 |
#' @importFrom methods is |
|
| 25 |
#' @import ggplot2 |
|
| 26 |
#' @importFrom stats predict |
|
| 27 |
#' @examples |
|
| 28 |
#' |
|
| 29 |
#' |
|
| 30 |
#' simdf <- growthSim("logistic",
|
|
| 31 |
#' n = 20, t = 25, |
|
| 32 |
#' params = list("A" = c(200, 160), "B" = c(13, 11), "C" = c(3, 3.5))
|
|
| 33 |
#' ) |
|
| 34 |
#' ss <- growthSS( |
|
| 35 |
#' model = "logistic", form = y ~ time | id / group, |
|
| 36 |
#' df = simdf, start = NULL, type = "nls" |
|
| 37 |
#' ) |
|
| 38 |
#' fit <- fitGrowth(ss) |
|
| 39 |
#' nlsPlot(fit, form = ss$pcvrForm, df = ss$df, groupFill = TRUE) |
|
| 40 |
#' nlsPlot(fit, form = ss$pcvrForm, df = ss$df, groups = "a", timeRange = 1:10) |
|
| 41 |
#' |
|
| 42 |
#' @return Returns a ggplot showing an nls model's predictions. |
|
| 43 |
#' |
|
| 44 |
#' @export |
|
| 45 | ||
| 46 |
nlsPlot <- function(fit, form, df = NULL, groups = NULL, timeRange = NULL, |
|
| 47 |
facetGroups = TRUE, groupFill = FALSE, virMaps = c("plasma")) {
|
|
| 48 |
#* `get needed information from formula` |
|
| 49 | 7x |
parsed_form <- .parsePcvrForm(form, df) |
| 50 |
#* `pick longitudinal or non-longitudinal helper` |
|
| 51 | 7x |
if (!is.numeric(df[, parsed_form$x]) && !parsed_form$USEG && !parsed_form$USEID) {
|
| 52 | ! |
p <- .nlsStaticPlot( |
| 53 | ! |
fit, form, df, groups, timeRange, |
| 54 | ! |
facetGroups, groupFill, virMaps, parsed_form |
| 55 |
) |
|
| 56 | ! |
return(p) |
| 57 |
} |
|
| 58 | 7x |
p <- .nlsLongitudinalPlot( |
| 59 | 7x |
fit, form, df, groups, timeRange, |
| 60 | 7x |
facetGroups, groupFill, virMaps, parsed_form |
| 61 |
) |
|
| 62 | 7x |
return(p) |
| 63 |
} |
|
| 64 | ||
| 65 |
#' @keywords internal |
|
| 66 |
#' @noRd |
|
| 67 | ||
| 68 |
.nlsStaticPlot <- function(fit, form, df, groups, timeRange, |
|
| 69 |
facetGroups, groupFill, virMaps, parsed_form) {
|
|
| 70 | ! |
x <- parsed_form$x |
| 71 | ! |
df <- parsed_form$data |
| 72 |
#* `when implemented SE can be added here, see ?predict.nls` |
|
| 73 | ! |
summary_df <- as.data.frame(coef(summary(mod1))) |
| 74 | ! |
colnames(summary_df) <- c("est", "err", "t", "p")
|
| 75 | ! |
summary_df[[x]] <- rownames(summary_df) |
| 76 | ! |
summary_df[1, x] <- paste0(x, unique(df[[x]])[1]) |
| 77 | ! |
summary_df[["est"]] <- cumsum(summary_df[["est"]]) |
| 78 |
#* `filter by groups if groups != NULL` |
|
| 79 | ! |
if (!is.null(groups)) {
|
| 80 | ! |
summary_df <- summary_df[summary_df[[x]] %in% groups, ] |
| 81 |
} |
|
| 82 |
#* `facetGroups` |
|
| 83 | ! |
facet_layer <- NULL |
| 84 | ! |
if (facetGroups) {
|
| 85 | ! |
facet_layer <- ggplot2::facet_wrap(stats::as.formula(paste0("~", x)))
|
| 86 |
} |
|
| 87 |
#* `groupFill` |
|
| 88 | ! |
if (groupFill) {
|
| 89 | ! |
virVals <- unlist(lapply( |
| 90 | ! |
rep(virMaps, length.out = length(unique(summary_df[[x]]))), |
| 91 | ! |
function(pal) {
|
| 92 | ! |
viridis::viridis(1, begin = 0.5, option = pal) |
| 93 |
} |
|
| 94 |
)) |
|
| 95 | ! |
color_scale <- ggplot2::scale_color_manual(values = virVals) |
| 96 |
} else {
|
|
| 97 | ! |
color_scale <- ggplot2::scale_color_manual(values = rep("#CC4678FF", length(unique(df[[x]]))))
|
| 98 |
} |
|
| 99 |
#* `plot` |
|
| 100 | ! |
plot <- ggplot(summary_df, ggplot2::aes(group = interaction(.data[[x]]))) + |
| 101 | ! |
facet_layer + |
| 102 | ! |
ggplot2::geom_errorbar(ggplot2::aes( |
| 103 | ! |
x = .data[[x]], |
| 104 | ! |
ymin = .data[["est"]] - 2 * .data[["err"]], |
| 105 | ! |
ymax = .data[["est"]] + 2 * .data[["err"]] |
| 106 | ! |
), width = 0.25) + |
| 107 | ! |
ggplot2::geom_point(ggplot2::aes(x = .data[[x]], y = .data[["est"]], color = .data[[x]]), |
| 108 | ! |
size = 4 |
| 109 |
) + |
|
| 110 | ! |
color_scale + |
| 111 | ! |
labs(x = x, y = as.character(form)[2]) + |
| 112 | ! |
pcv_theme() |
| 113 | ! |
return(plot) |
| 114 |
} |
|
| 115 | ||
| 116 |
#' @keywords internal |
|
| 117 |
#' @noRd |
|
| 118 | ||
| 119 |
.nlsLongitudinalPlot <- function(fit, form, df, groups, timeRange, |
|
| 120 |
facetGroups, groupFill, virMaps, parsed_form) {
|
|
| 121 | 7x |
y <- parsed_form$y |
| 122 | 7x |
x <- parsed_form$x |
| 123 | 7x |
individual <- parsed_form$individual |
| 124 | 7x |
if (individual == "dummyIndividual") {
|
| 125 | ! |
individual <- NULL |
| 126 |
} |
|
| 127 | 7x |
group <- parsed_form$group |
| 128 | 7x |
df <- parsed_form$data |
| 129 |
#* `filter by groups if groups != NULL` |
|
| 130 | 7x |
if (!is.null(groups)) {
|
| 131 | 1x |
df <- df[df[[group]] %in% groups, ] |
| 132 |
} |
|
| 133 |
#* `make new data if timerange is not NULL` |
|
| 134 | 7x |
if (!is.null(timeRange)) {
|
| 135 | 1x |
new_data <- do.call(rbind, lapply(unique(df[[group]]), function(g) {
|
| 136 | 1x |
stats::setNames(data.frame(g, timeRange), c(group, x)) |
| 137 |
})) |
|
| 138 | 1x |
df <- df[df[[x]] >= min(timeRange) & df[[x]] <= max(timeRange), ] |
| 139 |
} else {
|
|
| 140 | 6x |
new_data <- NULL |
| 141 |
} |
|
| 142 |
#* `add predictions` |
|
| 143 | ||
| 144 | 7x |
preds <- data.frame(pred = stats::predict(fit, newdata = new_data)) |
| 145 | 7x |
keep <- which(!duplicated(preds$pred)) |
| 146 | 7x |
plotdf <- df[keep, ] |
| 147 | 7x |
plotdf$pred <- preds[keep, "pred"] |
| 148 | ||
| 149 |
#* `when implemented SE can be added here, see ?predict.nls` |
|
| 150 |
#* |
|
| 151 |
#* `layer for individual lines if formula was complete` |
|
| 152 | 7x |
individual_lines <- list() |
| 153 | 7x |
if (!is.null(individual)) {
|
| 154 | 7x |
individual_lines <- ggplot2::geom_line( |
| 155 | 7x |
data = df, ggplot2::aes( |
| 156 | 7x |
x = .data[[x]], y = .data[[y]], |
| 157 | 7x |
group = interaction( |
| 158 | 7x |
.data[[individual]], |
| 159 | 7x |
.data[[group]] |
| 160 |
) |
|
| 161 |
), |
|
| 162 | 7x |
linewidth = 0.25, color = "gray40" |
| 163 |
) |
|
| 164 |
} |
|
| 165 |
#* `facetGroups` |
|
| 166 | 7x |
facet_layer <- NULL |
| 167 | 7x |
if (facetGroups) {
|
| 168 | 7x |
facet_layer <- ggplot2::facet_wrap(stats::as.formula(paste0("~", group)))
|
| 169 |
} |
|
| 170 |
#* `groupFill` |
|
| 171 | 7x |
if (groupFill) {
|
| 172 | 1x |
virVals <- unlist(lapply(rep(virMaps, length.out = length(unique(df[[group]]))), function(pal) {
|
| 173 | 2x |
viridis::viridis(1, begin = 0.5, option = pal) |
| 174 |
})) |
|
| 175 | 1x |
color_scale <- ggplot2::scale_color_manual(values = virVals) |
| 176 |
} else {
|
|
| 177 | 6x |
color_scale <- ggplot2::scale_color_manual(values = rep("#CC4678FF", length(unique(df[[group]]))))
|
| 178 |
} |
|
| 179 | ||
| 180 |
#* `plot` |
|
| 181 | 7x |
plot <- ggplot(plotdf, ggplot2::aes(group = interaction(.data[[group]]))) + |
| 182 | 7x |
facet_layer + |
| 183 | 7x |
individual_lines + |
| 184 | 7x |
ggplot2::geom_line(ggplot2::aes(x = .data[[x]], y = .data[["pred"]], color = .data[[group]]), |
| 185 | 7x |
linewidth = 0.7 |
| 186 | 7x |
) + # using middle of plasma pal |
| 187 | 7x |
color_scale + |
| 188 | 7x |
labs(x = x, y = as.character(form)[2]) + |
| 189 | 7x |
pcv_theme() |
| 190 | ||
| 191 | 7x |
return(plot) |
| 192 |
} |
|
| 193 | ||
| 194 |
#' @rdname nlsPlot |
|
| 195 |
#' @examples |
|
| 196 |
#' simdf <- growthSim("logistic",
|
|
| 197 |
#' n = 20, t = 25, |
|
| 198 |
#' params = list("A" = c(200, 160), "B" = c(13, 11), "C" = c(3, 3.5))
|
|
| 199 |
#' ) |
|
| 200 |
#' ss <- growthSS( |
|
| 201 |
#' model = "gam", form = y ~ time | id / group, |
|
| 202 |
#' df = simdf, start = NULL, type = "nls" |
|
| 203 |
#' ) |
|
| 204 |
#' fit <- fitGrowth(ss) |
|
| 205 |
#' gamPlot(fit, form = ss$pcvrForm, df = ss$df, groupFill = TRUE) |
|
| 206 |
#' gamPlot(fit, form = ss$pcvrForm, df = ss$df, groups = "a", timeRange = 1:10) |
|
| 207 |
#' ss <- growthSS( |
|
| 208 |
#' model = "gam", form = y ~ time | group, |
|
| 209 |
#' df = simdf, start = NULL, type = "nls" |
|
| 210 |
#' ) |
|
| 211 |
#' fit <- fitGrowth(ss) |
|
| 212 |
#' gamPlot(fit, form = ss$pcvrForm, df = ss$df, groupFill = TRUE) |
|
| 213 |
#' |
|
| 214 |
#' @export |
|
| 215 | ||
| 216 |
gamPlot <- function(fit, form, df = NULL, groups = NULL, timeRange = NULL, facetGroups = TRUE, |
|
| 217 |
groupFill = FALSE, virMaps = c("plasma")) {
|
|
| 218 |
#* `get needed information from formula` |
|
| 219 | 4x |
parsed_form <- .parsePcvrForm(form, df) |
| 220 | 4x |
y <- parsed_form$y |
| 221 | 4x |
x <- parsed_form$x |
| 222 | 4x |
individual <- parsed_form$individual |
| 223 | 4x |
if (individual == "dummyIndividual") {
|
| 224 | 1x |
individual <- NULL |
| 225 |
} |
|
| 226 | 4x |
group <- parsed_form$group |
| 227 | 4x |
df <- parsed_form$data |
| 228 |
#* `filter by groups if groups != NULL` |
|
| 229 | 4x |
if (!is.null(groups)) {
|
| 230 | 1x |
df <- df[df[[group]] %in% groups, ] |
| 231 |
} |
|
| 232 |
#* `make new data if timerange is not NULL` |
|
| 233 | 4x |
if (!is.null(timeRange)) {
|
| 234 | 1x |
new_data <- do.call(rbind, lapply(unique(df[[group]]), function(g) {
|
| 235 | 1x |
stats::setNames(data.frame(g, timeRange), c(group, x)) |
| 236 |
})) |
|
| 237 |
} else {
|
|
| 238 |
# note this is the only change between this and nlsPlot |
|
| 239 |
# this change is here because predict.nls sometimes acts strangely with the given data |
|
| 240 |
# but predict.gam does not accept a NULL input for the newdata argument. |
|
| 241 | 3x |
new_data <- df |
| 242 |
} |
|
| 243 |
#* `add predictions` |
|
| 244 | ||
| 245 | 4x |
preds <- data.frame(pred = stats::predict(fit, newdata = new_data)) |
| 246 | 4x |
keep <- which(!duplicated(preds$pred)) |
| 247 | 4x |
plotdf <- df[keep, ] |
| 248 | 4x |
plotdf$pred <- preds[keep, "pred"] |
| 249 | ||
| 250 |
#* `when implemented SE can be added here, see ?predict.nls` |
|
| 251 |
#* |
|
| 252 |
#* `layer for individual lines if formula was complete` |
|
| 253 | 4x |
individual_lines <- list() |
| 254 | 4x |
if (!is.null(individual)) {
|
| 255 | 3x |
individual_lines <- ggplot2::geom_line( |
| 256 | 3x |
data = df, ggplot2::aes( |
| 257 | 3x |
x = .data[[x]], y = .data[[y]], |
| 258 | 3x |
group = interaction( |
| 259 | 3x |
.data[[individual]], |
| 260 | 3x |
.data[[group]] |
| 261 |
) |
|
| 262 |
), |
|
| 263 | 3x |
linewidth = 0.25, color = "gray40" |
| 264 |
) |
|
| 265 |
} |
|
| 266 |
#* `facetGroups` |
|
| 267 | 4x |
facet_layer <- NULL |
| 268 | 4x |
if (facetGroups) {
|
| 269 | 4x |
facet_layer <- ggplot2::facet_wrap(stats::as.formula(paste0("~", group)))
|
| 270 |
} |
|
| 271 |
#* `groupFill` |
|
| 272 | 4x |
if (groupFill) {
|
| 273 | 2x |
virVals <- unlist(lapply(rep(virMaps, length.out = length(unique(df[[group]]))), function(pal) {
|
| 274 | 4x |
viridis::viridis(1, begin = 0.5, option = pal) |
| 275 |
})) |
|
| 276 | 2x |
color_scale <- ggplot2::scale_color_manual(values = virVals) |
| 277 |
} else {
|
|
| 278 | 2x |
color_scale <- ggplot2::scale_color_manual(values = rep("#CC4678FF", length(unique(df[[group]]))))
|
| 279 |
} |
|
| 280 | ||
| 281 |
#* `plot` |
|
| 282 | 4x |
plot <- ggplot(plotdf, ggplot2::aes(group = interaction(.data[[group]]))) + |
| 283 | 4x |
facet_layer + |
| 284 | 4x |
individual_lines + |
| 285 | 4x |
ggplot2::geom_line(ggplot2::aes(x = .data[[x]], y = .data[["pred"]], color = .data[[group]]), |
| 286 | 4x |
linewidth = 0.7 |
| 287 | 4x |
) + # using middle of plasma pal |
| 288 | 4x |
color_scale + |
| 289 | 4x |
labs(x = x, y = as.character(form)[2]) + |
| 290 | 4x |
pcv_theme() |
| 291 | ||
| 292 | 4x |
return(plot) |
| 293 |
} |
|
| 294 | ||
| 295 |
#' @rdname nlsPlot |
|
| 296 |
#' @examples |
|
| 297 |
#' simdf <- growthSim("logistic",
|
|
| 298 |
#' n = 20, t = 25, |
|
| 299 |
#' params = list("A" = c(200, 160), "B" = c(13, 11), "C" = c(3, 3.5))
|
|
| 300 |
#' ) |
|
| 301 |
#' ss <- growthSS( |
|
| 302 |
#' model = "gam", form = y ~ time | id / group, |
|
| 303 |
#' df = simdf, start = NULL, type = "nls" |
|
| 304 |
#' ) |
|
| 305 |
#' fit <- fitGrowth(ss) |
|
| 306 |
#' lmPlot(fit, form = ss$pcvrForm, df = ss$df) |
|
| 307 |
#' @export |
|
| 308 | ||
| 309 |
lmPlot <- function(fit, form, df = NULL, groups = NULL, timeRange = NULL, facetGroups = TRUE, |
|
| 310 |
groupFill = FALSE, virMaps = c("plasma")) {
|
|
| 311 | 1x |
nlsPlot(fit, form, df, groups, timeRange, facetGroups, groupFill, virMaps) |
| 312 |
} |
| 1 |
#' @description |
|
| 2 |
#' Internal function for calculating \mu and \kappa of a distribution represented by single value |
|
| 3 |
#' traits. |
|
| 4 |
#' @param s1 A vector of numerics generated from a circular process |
|
| 5 |
#' @examples |
|
| 6 |
#' .conj_vonmises2_sv( |
|
| 7 |
#' s1 = brms::rvon_mises(100, 2, 2), priors = list(mu = 0.5, kappa = 0.5), |
|
| 8 |
#' cred.int.level = 0.95, |
|
| 9 |
#' plot = FALSE |
|
| 10 |
#' ) |
|
| 11 |
#' .conj_vonmises2_sv( |
|
| 12 |
#' s1 = rnorm(20, 90, 20), |
|
| 13 |
#' priors = list(mu = 75, kappa = 0.5, boundary = c(0, 180)), |
|
| 14 |
#' cred.int.level = 0.95, |
|
| 15 |
#' plot = TRUE |
|
| 16 |
#' ) |
|
| 17 |
#' @keywords internal |
|
| 18 |
#' @noRd |
|
| 19 | ||
| 20 |
.conj_vonmises2_sv <- function(s1 = NULL, priors = NULL, |
|
| 21 |
plot = FALSE, support = NULL, cred.int.level = NULL, |
|
| 22 |
calculatingSupport = FALSE) {
|
|
| 23 |
#* `set support to NULL to avoid default length of 10000` |
|
| 24 | 17x |
support <- NULL |
| 25 |
#* `make default prior if none provided` |
|
| 26 | 17x |
default_prior <- list( |
| 27 | 17x |
mu = 0, kappa = 0.5, |
| 28 | 17x |
boundary = c(-pi, pi), |
| 29 | 17x |
n = 1 |
| 30 |
) |
|
| 31 | 17x |
if (is.null(priors)) {
|
| 32 | 1x |
priors <- default_prior |
| 33 |
} |
|
| 34 |
#* `if any elements are missing from prior then use defaults` |
|
| 35 | 17x |
priors <- stats::setNames(lapply(names(default_prior), function(nm) {
|
| 36 | 68x |
if (nm %in% names(priors)) {
|
| 37 | 52x |
return(priors[[nm]]) |
| 38 |
} else {
|
|
| 39 | 16x |
return(default_prior[[nm]]) |
| 40 |
} |
|
| 41 | 17x |
}), names(default_prior)) |
| 42 |
#* `rescale data to [-pi, pi] according to boundary` |
|
| 43 | 17x |
s1 <- .boundary.to.radians(x = s1, boundary = priors$boundary) |
| 44 |
#* `rescale prior on mu to [-pi, pi] according to boundary` |
|
| 45 | 17x |
mu_radians <- .boundary.to.radians(x = priors$mu, boundary = priors$boundary) |
| 46 |
#* `Raise error if the boundary is wrong and data is not on [-pi, pi]` |
|
| 47 | 17x |
if (any(abs(s1) > pi)) {
|
| 48 | 1x |
stop(paste0( |
| 49 | 1x |
"Values must be on [-pi, pi] after rescaling. ", |
| 50 | 1x |
"Does the boundary element in your prior include all your data?" |
| 51 |
)) |
|
| 52 |
} |
|
| 53 |
#* `Define dense Support` |
|
| 54 | 16x |
if (is.null(support)) {
|
| 55 | 16x |
if (calculatingSupport) {
|
| 56 | 8x |
return(priors$boundary) #* this would be [-pi, pi] if using radians, but plotting will be on |
| 57 |
#* the original scale so we can just return the boundary and use [-pi, pi] as support here |
|
| 58 |
} |
|
| 59 | 8x |
support_boundary <- seq(min(priors$boundary), max(priors$boundary), by = 0.0005) |
| 60 | 8x |
support <- seq(-pi, pi, length.out = length(support_boundary)) |
| 61 |
} |
|
| 62 | 8x |
out <- list() |
| 63 |
#* ***** `Updating Kappa` |
|
| 64 | 8x |
n1 <- length(s1) |
| 65 | 8x |
obs_kappa <- .unbiased.kappa(s1, n1) |
| 66 | 8x |
kappa_prime <- ((obs_kappa * n1) + (priors$kappa * priors$n)) / (n1 + priors$n) |
| 67 |
#* ***** `Updating vMF for mu using kappa prime` |
|
| 68 |
#* `Get weighted mean of data and prior for half tangent adjustment` |
|
| 69 | 8x |
cm <- .circular.mean(c(s1, mu_radians), w = c(rep(1, length(s1)), priors$n)) |
| 70 | 8x |
unitCircleAdj <- ifelse(abs(cm) <= pi / 2, 0, pi) |
| 71 | 8x |
unitCircleAdj <- ifelse(cm > 0, 1, -1) * unitCircleAdj |
| 72 |
#* `Update prior parameters` |
|
| 73 | 8x |
a <- priors$kappa # kappa parameter can easily overwhelm mean in updating with low sample size |
| 74 |
# I do not love this currently. It seems like a should be kappa prime, but that very easily |
|
| 75 |
# overwhelms small samples in the follow formula to update mu, so instead of the sequential |
|
| 76 |
# updating I am using separate updating. |
|
| 77 |
# the formula below basically weighs the prior by kappa, so if I update kappa then weigh the prior |
|
| 78 |
# by that then it is much more biased towards the prior, I think that is worth avoiding with this |
|
| 79 |
# workaround |
|
| 80 | 8x |
b <- mu_radians |
| 81 | 8x |
mu_prime_atan_scale <- atan(((a * sin(b)) + sum(sin(s1))) / ((a * cos(b)) + sum(cos(s1)))) |
| 82 | 8x |
mu_prime <- unitCircleAdj + mu_prime_atan_scale |
| 83 |
#* `calculate density over support` |
|
| 84 | 8x |
dens1 <- brms::dvon_mises(support, mu_prime, kappa_prime) |
| 85 | 8x |
pdf1 <- dens1 / sum(dens1) |
| 86 |
#* `calculate highest density interval` |
|
| 87 |
#* note there is no qvon_mises function, so I am using bayestestR::hdi on |
|
| 88 |
#* posterior draws and rescaled posterior draws |
|
| 89 | 8x |
draws <- brms::rvon_mises(10000, mu_prime, kappa_prime) |
| 90 | 8x |
hdi_v1 <- as.numeric(bayestestR::hdi(draws, ci = cred.int.level))[2:3] |
| 91 | 8x |
draws2 <- draws |
| 92 | 8x |
draws2[draws2 < 0] <- draws2[draws2 < 0] + 2 * pi |
| 93 | 8x |
hdi_v2 <- as.numeric(bayestestR::hdi(draws2, ci = cred.int.level))[2:3] |
| 94 | 8x |
hdis <- list(hdi_v1, hdi_v2) |
| 95 | 8x |
hdi <- hdis[[which.min(c(diff(hdi_v1), diff(hdi_v2)))]] |
| 96 | 8x |
hdi[hdi > pi] <- hdi[hdi > pi] - (2 * pi) # if the second hdi was narrower then fix the part beyond pi |
| 97 |
#* `store highest density estimate` |
|
| 98 | 8x |
hde <- mu_prime |
| 99 |
#* `Rescale HDI, HDE, and draws, from radians to boundary units` |
|
| 100 | 8x |
hdi_boundary <- .radians.to.boundary(hdi, target = priors$boundary) |
| 101 | 8x |
hde_boundary <- .radians.to.boundary(hde, target = priors$boundary) |
| 102 | 8x |
draws_boundary <- .radians.to.boundary(draws, target = priors$boundary) |
| 103 |
#* `save summary and parameters` |
|
| 104 | 8x |
out$summary <- data.frame( |
| 105 | 8x |
HDE_1 = hde_boundary, |
| 106 | 8x |
HDI_1_low = hdi_boundary[1], |
| 107 | 8x |
HDI_1_high = hdi_boundary[2] |
| 108 |
) |
|
| 109 | 8x |
out$posterior$mu <- hde_boundary # rescaled mu_prime |
| 110 | 8x |
out$posterior$kappa <- kappa_prime |
| 111 | 8x |
out$posterior$n <- priors$n + length(s1) |
| 112 | 8x |
out$posterior$boundary <- priors$boundary |
| 113 |
#* `Store Posterior Draws` |
|
| 114 | 8x |
out$posteriorDraws <- draws_boundary |
| 115 | 8x |
out$pdf <- pdf1 |
| 116 |
#* `keep data for plotting` |
|
| 117 | 8x |
if (plot) {
|
| 118 | 4x |
out$plot_df <- data.frame( |
| 119 | 4x |
"range" = support_boundary, "prob" = pdf1, |
| 120 | 4x |
"sample" = rep("Sample 1", length(support_boundary))
|
| 121 |
) |
|
| 122 |
} # tests on this seem to work fine |
|
| 123 | 8x |
return(out) |
| 124 |
} |
|
| 125 | ||
| 126 |
#' @description |
|
| 127 |
#' Internal function for calculating \mu and \kappa of a distribution represented by multi value |
|
| 128 |
#' traits. |
|
| 129 |
#' @param s1 A vector of numerics generated from a circular process |
|
| 130 |
#' @examples |
|
| 131 |
#' mv_gauss <- mvSim( |
|
| 132 |
#' dists = list( |
|
| 133 |
#' rnorm = list(mean = 50, sd = 10) |
|
| 134 |
#' ), |
|
| 135 |
#' n_samples = 30 |
|
| 136 |
#' ) |
|
| 137 |
#' .conj_vonmises2_mv( |
|
| 138 |
#' s1 = mv_gauss[, -1], priors = list(mu = 30, kappa = 1, boundary = c(0, 180)), |
|
| 139 |
#' cred.int.level = 0.95, |
|
| 140 |
#' plot = TRUE |
|
| 141 |
#' ) |
|
| 142 |
#' @keywords internal |
|
| 143 |
#' @noRd |
|
| 144 | ||
| 145 |
.conj_vonmises2_mv <- function(s1 = NULL, priors = NULL, |
|
| 146 |
plot = FALSE, support = NULL, cred.int.level = NULL, |
|
| 147 |
calculatingSupport = FALSE) {
|
|
| 148 |
#* `set support to NULL to avoid default length of 10000` |
|
| 149 | 13x |
support <- NULL |
| 150 |
#* `Reorder columns if they are not in the numeric order` |
|
| 151 | 13x |
histColsBin <- as.numeric(sub("[a-zA-Z_.]+", "", colnames(s1)))
|
| 152 | 13x |
bins_order <- sort(histColsBin, index.return = TRUE)$ix |
| 153 | 13x |
s1 <- s1[, bins_order] |
| 154 |
#* `Turn s1 matrix into a vector` |
|
| 155 | 13x |
X1 <- rep(histColsBin[bins_order], as.numeric(round(colSums(s1)))) |
| 156 |
#* `make default prior if none provided` |
|
| 157 | 13x |
default_prior <- list( |
| 158 | 13x |
mu = 0, kappa = 0.5, |
| 159 | 13x |
boundary = c(-pi, pi), |
| 160 | 13x |
n = 1 |
| 161 |
) |
|
| 162 | 13x |
if (is.null(priors)) {
|
| 163 | 1x |
priors <- default_prior |
| 164 |
} |
|
| 165 |
#* `if any elements are missing from prior then use defaults` |
|
| 166 | 13x |
priors <- stats::setNames(lapply(names(default_prior), function(nm) {
|
| 167 | 52x |
if (nm %in% names(priors)) {
|
| 168 | 40x |
return(priors[[nm]]) |
| 169 |
} else {
|
|
| 170 | 12x |
return(default_prior[[nm]]) |
| 171 |
} |
|
| 172 | 13x |
}), names(default_prior)) |
| 173 |
#* `rescale data to [-pi, pi] according to boundary` |
|
| 174 | 13x |
X1 <- .boundary.to.radians(x = X1, boundary = priors$boundary) |
| 175 |
#* `rescale prior on mu to [-pi, pi] according to boundary` |
|
| 176 | 13x |
mu_radians <- .boundary.to.radians(x = priors$mu, boundary = priors$boundary) |
| 177 |
#* `Raise error if the boundary is wrong and data is not on [-pi, pi]` |
|
| 178 | 13x |
if (any(abs(X1) > pi)) {
|
| 179 | 1x |
stop(paste0( |
| 180 | 1x |
"Values must be on [-pi, pi] after rescaling. ", |
| 181 | 1x |
"Does the boundary element in your prior include all your data?" |
| 182 |
)) |
|
| 183 |
} |
|
| 184 |
#* `Define dense Support` |
|
| 185 | 12x |
if (is.null(support)) {
|
| 186 | 12x |
if (calculatingSupport) {
|
| 187 | 6x |
return(priors$boundary) #* this would be [-pi, pi] if using radians, but plotting will be on |
| 188 |
#* the original scale so we can just return the boundary and use [-pi, pi] as support here |
|
| 189 |
} |
|
| 190 | 6x |
support_boundary <- seq(min(priors$boundary), max(priors$boundary), by = 0.0005) |
| 191 | 6x |
support <- seq(-pi, pi, length.out = length(support_boundary)) |
| 192 |
} |
|
| 193 | 6x |
out <- list() |
| 194 |
#* ***** `Updating Kappa` |
|
| 195 | 6x |
n1 <- nrow(s1) |
| 196 | 6x |
obs_kappa <- .unbiased.kappa(X1, length(X1)) |
| 197 | 6x |
kappa_prime <- ((obs_kappa * n1) + (priors$kappa * priors$n)) / (n1 + priors$n) |
| 198 |
#* ***** `Updating vMF for mu using kappa prime` |
|
| 199 |
#* `Get weighted mean of data and prior for half tangent adjustment` |
|
| 200 | 6x |
cm <- .circular.mean(c(X1, mu_radians), w = c(rep(nrow(s1) / length(X1), length(X1)), priors$n)) |
| 201 | 6x |
unitCircleAdj <- ifelse(abs(cm) <= pi / 2, 0, pi) |
| 202 | 6x |
unitCircleAdj <- ifelse(cm > 0, 1, -1) * unitCircleAdj |
| 203 |
#* `Update prior parameters` |
|
| 204 | 6x |
a <- priors$kappa |
| 205 | 6x |
b <- mu_radians |
| 206 | 6x |
mu_prime_atan_scale <- atan(((a * sin(b)) + sum(sin(X1))) / ((a * cos(b)) + sum(cos(X1)))) |
| 207 | 6x |
mu_prime <- unitCircleAdj + mu_prime_atan_scale |
| 208 |
#* `calculate density over support` |
|
| 209 | 6x |
dens1 <- brms::dvon_mises(support, mu_prime, kappa_prime) |
| 210 | 6x |
pdf1 <- dens1 / sum(dens1) |
| 211 |
#* `calculate highest density interval` |
|
| 212 |
#* note there is no qvon_mises function, so I am using bayestestR::hdi on |
|
| 213 |
#* posterior draws and rescaled posterior draws |
|
| 214 | 6x |
draws <- brms::rvon_mises(10000, mu_prime, kappa_prime) |
| 215 | 6x |
hdi_v1 <- as.numeric(bayestestR::hdi(draws, ci = cred.int.level))[2:3] |
| 216 | 6x |
draws2 <- draws |
| 217 | 6x |
draws2[draws2 < 0] <- draws2[draws2 < 0] + 2 * pi |
| 218 | 6x |
hdi_v2 <- as.numeric(bayestestR::hdi(draws2, ci = cred.int.level))[2:3] |
| 219 | 6x |
hdis <- list(hdi_v1, hdi_v2) |
| 220 | 6x |
hdi <- hdis[[which.min(c(diff(hdi_v1), diff(hdi_v2)))]] |
| 221 | 6x |
hdi[hdi > pi] <- hdi[hdi > pi] - (2 * pi) # if the second hdi was narrower then fix the part beyond pi |
| 222 |
#* `store highest density estimate` |
|
| 223 | 6x |
hde <- mu_prime |
| 224 |
#* `Rescale HDI, HDE, draws, and support from radians to boundary units` |
|
| 225 | 6x |
hdi_boundary <- .radians.to.boundary(hdi, target = priors$boundary) |
| 226 | 6x |
hde_boundary <- .radians.to.boundary(hde, target = priors$boundary) |
| 227 | 6x |
draws_boundary <- .radians.to.boundary(draws, target = priors$boundary) |
| 228 |
#* `save summary and parameters` |
|
| 229 | 6x |
out$summary <- data.frame( |
| 230 | 6x |
HDE_1 = hde_boundary, |
| 231 | 6x |
HDI_1_low = hdi_boundary[1], |
| 232 | 6x |
HDI_1_high = hdi_boundary[2] |
| 233 |
) |
|
| 234 | 6x |
out$posterior$mu <- hde_boundary # rescaled mu_prime |
| 235 | 6x |
out$posterior$kappa <- kappa_prime |
| 236 | 6x |
out$posterior$n <- priors$n + nrow(s1) |
| 237 | 6x |
out$posterior$boundary <- priors$boundary |
| 238 |
#* `Store Posterior Draws` |
|
| 239 | 6x |
out$posteriorDraws <- draws_boundary |
| 240 | 6x |
out$pdf <- pdf1 |
| 241 |
#* `keep data for plotting` |
|
| 242 | 6x |
if (plot) {
|
| 243 | 4x |
out$plot_df <- data.frame( |
| 244 | 4x |
"range" = support_boundary, "prob" = pdf1, |
| 245 | 4x |
"sample" = rep("Sample 1", length(support_boundary))
|
| 246 |
) |
|
| 247 |
} |
|
| 248 | 6x |
return(out) |
| 249 |
} |
|
| 250 | ||
| 251 | ||
| 252 | ||
| 253 | ||
| 254 |
#' @description |
|
| 255 |
#' Function calculating inverse of the ratio of first and zeroth order bessel functions. |
|
| 256 |
#' This is a biased estimate of kappa, with small samples sizes the bias is strong. |
|
| 257 |
#' @param x A vector of numerics from a von mises distribution |
|
| 258 |
#' @examples |
|
| 259 |
#' x <- brms::rvon_mises(100, 3, 2) |
|
| 260 |
#' .bessel.inv(mean(cos(x - .circular.mean(x)))) |
|
| 261 |
#' @keywords internal |
|
| 262 |
#' @noRd |
|
| 263 | ||
| 264 |
.bessel.inv <- function(x) {
|
|
| 265 | 32x |
ifelse(0 <= x & x < 0.53, |
| 266 | 32x |
2 * x + x^3 + (5 * x^5) / 6, |
| 267 | 32x |
ifelse(x < 0.85, |
| 268 | 32x |
-0.4 + 1.39 * x + 0.43 / (1 - x), |
| 269 | 32x |
1 / (x^3 - 4 * x^2 + 3 * x) |
| 270 |
) |
|
| 271 |
) |
|
| 272 |
} |
|
| 273 | ||
| 274 |
#' @description |
|
| 275 |
#' helper function to estimate kappa based on data generated from a von mises distribution. |
|
| 276 |
#' This is used from the CircStats package. |
|
| 277 |
#' @param x sample of numeric draws from a von-mises distribution |
|
| 278 |
#' @param n the number of samples used. If NULL, the default, then length(x) is used. If this is <16 |
|
| 279 |
#' then the bias adjustment from Best, D. and Fisher N. (1981) is used. |
|
| 280 |
#' @examples |
|
| 281 |
#' .unbiased.kappa(brms::rvon_mises(15, 3, 2)) |
|
| 282 |
#' @keywords internal |
|
| 283 |
#' @noRd |
|
| 284 | ||
| 285 |
.unbiased.kappa <- function(x, n = NULL) {
|
|
| 286 | 32x |
if (is.null(n)) {
|
| 287 | 18x |
n <- length(x) |
| 288 |
} |
|
| 289 | 32x |
mean.dir <- .circular.mean(x) |
| 290 | 32x |
kappa <- .bessel.inv(mean(cos(x - mean.dir))) |
| 291 | 32x |
if (n < 16) {
|
| 292 | 8x |
kappa.biased <- kappa |
| 293 | 8x |
if (kappa.biased < 2) {
|
| 294 | 2x |
kappa <- max(kappa.biased - 2 * (n * kappa.biased)^-1, 0) |
| 295 |
} else {
|
|
| 296 | 6x |
kappa <- ((n - 1)^3 * kappa.biased) / (n^3 + n) |
| 297 |
} |
|
| 298 |
} |
|
| 299 | 32x |
kappa |
| 300 |
} |
| 1 |
#' Helper function for parsing pcvr formulas used in growthSS and downstream functions |
|
| 2 |
#' |
|
| 3 |
#' @param form The pcvr style formula specifying outcome, predictor, individuals, and groups. |
|
| 4 |
#' @param df The data that will be used to fit the model |
|
| 5 |
#' |
|
| 6 |
#' @keywords internal |
|
| 7 |
#' @noRd |
|
| 8 | ||
| 9 |
.parsePcvrForm <- function(form, df = NULL) {
|
|
| 10 |
#* `parse form argument` |
|
| 11 | 158x |
y <- as.character(form)[2] |
| 12 | 158x |
x <- as.character(form)[3] |
| 13 | 158x |
USEGROUP <- FALSE |
| 14 | 158x |
if (grepl("\\|", x) && grepl("\\/", x)) { # Y ~ X per id within group
|
| 15 | 146x |
x3 <- trimws(strsplit(x, "[|]|[/]")[[1]]) |
| 16 | 146x |
x <- x3[1] |
| 17 | 146x |
individual <- x3[2] |
| 18 | 146x |
group <- x3[length(x3)] |
| 19 | 146x |
USEINDIVIDUAL <- TRUE |
| 20 | 146x |
if (!is.null(df)) {
|
| 21 | 146x |
if (length(unique(df[[group]])) == 1) {
|
| 22 | 6x |
USEGROUP <- FALSE |
| 23 |
} else {
|
|
| 24 | 140x |
USEGROUP <- TRUE |
| 25 |
} # if there is only one group then ignore grouping |
|
| 26 |
} |
|
| 27 | 12x |
} else if (grepl("\\|", x)) { # Y ~ X by group
|
| 28 | 10x |
x2 <- trimws(strsplit(x, "[|]")[[1]]) |
| 29 | 10x |
x <- x2[1] |
| 30 | 10x |
individual <- "dummyIndividual" |
| 31 | 10x |
df[[individual]] <- "dummyIndividual" |
| 32 | 10x |
group <- x2[length(x2)] |
| 33 | 10x |
USEGROUP <- TRUE |
| 34 | 10x |
USEINDIVIDUAL <- FALSE |
| 35 |
} else { # Y ~ X
|
|
| 36 | 2x |
x2 <- trimws(strsplit(x, "[|]")[[1]]) |
| 37 | 2x |
x <- x2[1] |
| 38 | 2x |
individual <- "dummyIndividual" |
| 39 | 2x |
group <- "dummyGroup" |
| 40 | 2x |
if (!is.null(df)) {
|
| 41 | 2x |
df[[individual]] <- "dummyIndividual" |
| 42 | 2x |
df[[group]] <- "dummyGroup" |
| 43 |
} |
|
| 44 | 2x |
USEGROUP <- FALSE |
| 45 | 2x |
USEINDIVIDUAL <- FALSE |
| 46 |
} |
|
| 47 | 158x |
if (grepl("\\+", x)) {
|
| 48 | 1x |
x_components <- lapply(strsplit(x, "\\+"), trimws) |
| 49 | 1x |
x <- x_components[[1]][1] |
| 50 | 1x |
hierarchical_predictor <- x_components[[1]][2] |
| 51 |
} else {
|
|
| 52 | 157x |
hierarchical_predictor <- NULL |
| 53 |
} |
|
| 54 | 158x |
if (!is.null(df)) {
|
| 55 | 158x |
tryCatch( |
| 56 |
{
|
|
| 57 | 158x |
df <- df[complete.cases(df[, c(x, y, individual, group)]), ] |
| 58 |
}, |
|
| 59 | 158x |
error = function(err) {}
|
| 60 |
) |
|
| 61 |
} |
|
| 62 | 158x |
return(list( |
| 63 | 158x |
"y" = y, "x" = x, "individual" = individual, "group" = group, |
| 64 | 158x |
"USEG" = USEGROUP, "USEID" = USEINDIVIDUAL, "data" = df, |
| 65 | 158x |
"hierarchical_predictor" = hierarchical_predictor |
| 66 |
)) |
|
| 67 |
} |
| 1 |
#' Earth Mover's Distance between spectral histograms |
|
| 2 |
#' |
|
| 3 |
#' @description pcv.emd can be used to calculate Earth Mover's Distance between pairwise histograms |
|
| 4 |
#' in a wide dataframe of multi value traits. The is expected to be used with output from \code{mv_ag}.
|
|
| 5 |
#' See also \link{pcv.euc} for euclidean distance between histograms.
|
|
| 6 |
#' |
|
| 7 |
#' @param df Data frame to use with multi value traits in wide format or long format |
|
| 8 |
#' @param cols Columns to use. Defaults to NULL in which case all columns are used. |
|
| 9 |
#' Single strings will be used to regex a pattern in column names (see examples). |
|
| 10 |
#' A vector of names, positions, or booleans will also work. |
|
| 11 |
#' For long data this is taken as a regex pattern (or full name) |
|
| 12 |
#' to use in filtering the trait column. |
|
| 13 |
#' @param reorder Should data be reordered to put similar rows together in the resulting plot? |
|
| 14 |
#' This takes a vector of column names of length 1 or more (see examples). |
|
| 15 |
#' @param include if a long dataframe is returned then these columns will be added to the dataframe, |
|
| 16 |
#' labelled for i and j (the row positions for compared histograms). |
|
| 17 |
#' If a matrix is returned then this information is stored in the row names. |
|
| 18 |
#' This defaults to reorder. |
|
| 19 |
#' @param mat Logical, should data be returned as an nrow x nrow matrix or as a long dataframe? |
|
| 20 |
#' By Default this is FALSE and a long dataframe is returned. |
|
| 21 |
#' Both options are comparable in terms of speed, |
|
| 22 |
#' although for large datasets the matrix version may be slightly faster. |
|
| 23 |
#' @param plot Logical, should a plot be returned? For a matrix this is made with heatmap(), |
|
| 24 |
#' for a dataframe this uses ggplot. |
|
| 25 |
#' @param parallel Number of cores to use. Defaults to 1 unless the "mc.cores" option is set. |
|
| 26 |
#' @param trait Column name for long data to identify traits. This defaults to "trait". If this and |
|
| 27 |
#' value are in the column names of the data then it is assumed to be in long format, |
|
| 28 |
#' otherwise it is assumed to be in wide format. |
|
| 29 |
#' @param id A vector of column names that uniquely identifies observations if the |
|
| 30 |
#' data is in long format. Defaults to "image". |
|
| 31 |
#' @param value A column name for the values to be drawn from in long data. |
|
| 32 |
#' Defaults to "value". |
|
| 33 |
#' @param raiseError Logical, should warnings/errors be raised for potentially large output? |
|
| 34 |
#' It is easy to ask for very many comparisons with this function so the goal of this argument |
|
| 35 |
#' is to catch a few of those and give estimates of how much time something may take. |
|
| 36 |
#' If the function is expected to take very long then a warning or an error is raised. |
|
| 37 |
#' If this is set to FALSE then no time estimates are made. |
|
| 38 |
#' @param method Which method to use (one of "emd" or "euc"). Defaults to "emd". |
|
| 39 |
#' @import ggplot2 |
|
| 40 |
#' @import parallel |
|
| 41 |
#' @return A dataframe/matrix (if plot=FALSE) or a list with a dataframe/matrix and\ |
|
| 42 |
#' a ggplot (if plot=TRUE). The returned data contains pairwise EMD values. |
|
| 43 |
#' |
|
| 44 |
#' @keywords emd earth-mover's-distance multi-value histogram |
|
| 45 |
#' @examples |
|
| 46 |
#' |
|
| 47 |
#' |
|
| 48 |
#' set.seed(123) |
|
| 49 |
#' test <- mvSim( |
|
| 50 |
#' dists = list( |
|
| 51 |
#' runif = list(min = 0, max = 100), |
|
| 52 |
#' rnorm = list(mean = 90, sd = 20) |
|
| 53 |
#' ), |
|
| 54 |
#' n_samples = 10 |
|
| 55 |
#' ) |
|
| 56 |
#' test$meta1 <- rep(LETTERS[1:3], length.out = nrow(test)) |
|
| 57 |
#' test$meta2 <- rep(LETTERS[4:5], length.out = nrow(test)) |
|
| 58 |
#' |
|
| 59 |
#' x <- pcv.emd( |
|
| 60 |
#' df = test, cols = "sim", reorder = "group", |
|
| 61 |
#' include = c("meta1", "meta2"), mat = FALSE,
|
|
| 62 |
#' plot = FALSE, parallel = 1 |
|
| 63 |
#' ) |
|
| 64 |
#' head(x) |
|
| 65 |
#' x2 <- pcv.emd( |
|
| 66 |
#' df = test, cols = "sim", reorder = "group", |
|
| 67 |
#' include = c("meta1", "meta2"), mat = FALSE,
|
|
| 68 |
#' plot = FALSE, parallel = 1, method = "euc" |
|
| 69 |
#' ) |
|
| 70 |
#' head(x2) |
|
| 71 |
#' |
|
| 72 |
#' \donttest{
|
|
| 73 |
#' library(data.table) |
|
| 74 |
#' file <- paste0( |
|
| 75 |
#' "https://media.githubusercontent.com/media/joshqsumner/", |
|
| 76 |
#' "pcvrTestData/main/pcv4-multi-value-traits.csv" |
|
| 77 |
#' ) |
|
| 78 |
#' df1 <- read.pcv(file, "wide", reader = "fread") |
|
| 79 |
#' |
|
| 80 |
#' df1$genotype <- substr(df1$barcode, 3, 5) |
|
| 81 |
#' df1$genotype <- ifelse(df1$genotype == "002", "B73", |
|
| 82 |
#' ifelse(df1$genotype == "003", "W605S", |
|
| 83 |
#' ifelse(df1$genotype == "004", "MM", "Mo17") |
|
| 84 |
#' ) |
|
| 85 |
#' ) |
|
| 86 |
#' df1$fertilizer <- substr(df1$barcode, 8, 8) |
|
| 87 |
#' df1$fertilizer <- ifelse(df1$fertilizer == "A", "100", |
|
| 88 |
#' ifelse(df1$fertilizer == "B", "50", "0") |
|
| 89 |
#' ) |
|
| 90 |
#' |
|
| 91 |
#' tryCatch( |
|
| 92 |
#' {
|
|
| 93 |
#' w <- pcv.emd(df1, |
|
| 94 |
#' cols = "hue_frequencies", reorder = c("fertilizer", "genotype"),
|
|
| 95 |
#' mat = FALSE, plot = TRUE, parallel = 1 |
|
| 96 |
#' ) |
|
| 97 |
#' }, |
|
| 98 |
#' error = function(err) {
|
|
| 99 |
#' message(err) |
|
| 100 |
#' } |
|
| 101 |
#' ) |
|
| 102 |
#' |
|
| 103 |
#' # Note on computational complexity |
|
| 104 |
#' # This scales as O^2, see the plot below for some idea |
|
| 105 |
#' # of the time for different input data sizes. |
|
| 106 |
#' emdTime <- function(x, n = 1) {
|
|
| 107 |
#' x^2 / n * 0.0023 |
|
| 108 |
#' } |
|
| 109 |
#' plot( |
|
| 110 |
#' x = c(18, 36, 54, 72, 108, 135), y = c(0.74, 2.89, 6.86, 10.99, 26.25, 42.44), |
|
| 111 |
#' xlab = "N Input Images", ylab = "time (seconds)" |
|
| 112 |
#' ) # benchmarked test data |
|
| 113 |
#' lines(x = 1:150, y = emdTime(1:150)) # exponential function |
|
| 114 |
#' |
|
| 115 |
#' plot( |
|
| 116 |
#' x = 1:1000, y = emdTime(1:1000), type = "l", |
|
| 117 |
#' xlab = "N Input Images", ylab = "time (seconds)" |
|
| 118 |
#' ) |
|
| 119 |
#' } |
|
| 120 |
#' |
|
| 121 |
#' @export |
|
| 122 |
#' |
|
| 123 |
pcv.emd <- function(df, cols = NULL, reorder = NULL, include = reorder, mat = FALSE, plot = TRUE, |
|
| 124 |
parallel = getOption("mc.cores", 1), trait = "trait", id = "image",
|
|
| 125 |
value = "value", raiseError = TRUE, method = "emd") {
|
|
| 126 | 12x |
if (all(c(trait, value) %in% colnames(df))) {
|
| 127 | 4x |
long <- TRUE |
| 128 | 4x |
traitCol <- trait |
| 129 |
} else {
|
|
| 130 | 8x |
long <- FALSE |
| 131 |
} |
|
| 132 | 12x |
if (!is.null(reorder)) {
|
| 133 | 8x |
df <- df[order(interaction(df[, reorder])), ] |
| 134 |
} |
|
| 135 | 12x |
if (long) {
|
| 136 | 4x |
out_data <- .longEmdCalculation( |
| 137 | 4x |
df, cols, traitCol, raiseError, |
| 138 | 4x |
parallel, id, value, include, mat, method |
| 139 |
) |
|
| 140 |
} else { # wide input
|
|
| 141 | ||
| 142 | 8x |
out_data <- .wideEmdCalculation(df, cols, raiseError, parallel, id, include, mat, method) |
| 143 |
} |
|
| 144 | 12x |
if (plot) {
|
| 145 | 8x |
if (mat) {
|
| 146 | 4x |
p <- stats::heatmap(out_data) |
| 147 |
} else {
|
|
| 148 | 4x |
p <- ggplot2::ggplot(out_data, ggplot2::aes(x = .data$i, y = .data$j, fill = .data[[method]])) + |
| 149 | 4x |
ggplot2::geom_tile(color = NA) + |
| 150 | 4x |
ggplot2::labs(fill = method) + |
| 151 | 4x |
ggplot2::theme_minimal() + |
| 152 | 4x |
ggplot2::theme( |
| 153 | 4x |
axis.line.x.bottom = ggplot2::element_line(), axis.line.y.left = ggplot2::element_line(), |
| 154 | 4x |
legend.position = "bottom" |
| 155 |
) |
|
| 156 |
} |
|
| 157 | 8x |
outList <- list("data" = out_data, "plot" = p)
|
| 158 |
} else {
|
|
| 159 | 4x |
outList <- out_data |
| 160 |
} |
|
| 161 | 12x |
return(outList) |
| 162 |
} |
|
| 163 | ||
| 164 | ||
| 165 |
#' Error Raising function for very long EMD calculation times |
|
| 166 |
#' @keywords internal |
|
| 167 |
#' @noRd |
|
| 168 | ||
| 169 |
.emdRaiseError <- function(raiseError, df, parallel, trait = NULL) {
|
|
| 170 | 15x |
n <- ifelse(is.null(trait), nrow(df), nrow(df) / length(unique(df[[trait]]))) |
| 171 | 15x |
if (raiseError) {
|
| 172 | 9x |
et_sec <- 0.00125 * ((n / parallel)^2) |
| 173 | 9x |
et_min <- et_sec / 60 |
| 174 | 9x |
et_hour <- et_min / 60 |
| 175 | 9x |
if (et_sec <= 300) {
|
| 176 | 7x |
message(paste0( |
| 177 | 7x |
"Estimated time of calculation is roughly ", round(et_sec, 1), |
| 178 | 7x |
" seconds using ", parallel, " cores in parallel." |
| 179 |
)) |
|
| 180 | 2x |
} else if (et_min < 60) {
|
| 181 | 1x |
warning(paste0( |
| 182 | 1x |
"Estimated time of calculation is roughly ", round(et_min, 2), |
| 183 | 1x |
" minutes using ", parallel, " cores in parallel." |
| 184 |
)) |
|
| 185 | 1x |
} else if (et_min > 60) {
|
| 186 | 1x |
stop(paste0( |
| 187 | 1x |
"Stopping, estimated time of calculation is roughly ", round(et_hour, 2), |
| 188 | 1x |
" hours using ", parallel, " cores in parallel.", |
| 189 | 1x |
"\nIf you wish to proceed then rerun this command with raiseError=FALSE" |
| 190 |
)) |
|
| 191 |
} |
|
| 192 |
} |
|
| 193 |
} |
|
| 194 | ||
| 195 |
#' Long EMD calculation |
|
| 196 |
#' @keywords internal |
|
| 197 |
#' @noRd |
|
| 198 | ||
| 199 |
.longEmdCalculation <- function(df, cols, traitCol, raiseError, parallel, id, |
|
| 200 |
value, include, mat, method) {
|
|
| 201 | 4x |
dist_1d <- match.fun(paste0(method, "1d")) |
| 202 | 4x |
df <- df[grepl(cols, df[[traitCol]]), ] |
| 203 |
#* `raise error` |
|
| 204 | 4x |
.emdRaiseError(raiseError, df, parallel, trait = traitCol) |
| 205 |
#* `calculate emd` |
|
| 206 | 4x |
df$INNER_ID_EMD <- interaction(df[, id], drop = TRUE) |
| 207 | 4x |
if (mat) { # make dist matrix
|
| 208 | 2x |
mat_obj <- matrix(0, |
| 209 | 2x |
nrow = length(unique(df$INNER_ID_EMD)), |
| 210 | 2x |
ncol = length(unique(df$INNER_ID_EMD)) |
| 211 |
) |
|
| 212 | 2x |
values <- unlist(lapply(seq_along(unique(df$INNER_ID_EMD)), function(i_n) {
|
| 213 | 40x |
parallel::mclapply(seq_along(unique(df$INNER_ID_EMD)), function(j_n) {
|
| 214 | 800x |
i <- unique(df$INNER_ID_EMD)[i_n] |
| 215 | 800x |
j <- unique(df$INNER_ID_EMD)[j_n] |
| 216 | 800x |
if (i_n < j_n) {
|
| 217 | 380x |
dist_1d( |
| 218 | 380x |
as.numeric(df[df$INNER_ID_EMD == as.character(i), value]), |
| 219 | 380x |
as.numeric(df[df$INNER_ID_EMD == as.character(j), value]) |
| 220 |
) |
|
| 221 |
} |
|
| 222 | 40x |
}, mc.cores = parallel) |
| 223 |
})) |
|
| 224 | 2x |
mat_obj[lower.tri(mat_obj)] <- values |
| 225 | 2x |
tmat_obj <- t(mat_obj) |
| 226 | 2x |
mat_obj[upper.tri(mat_obj)] <- tmat_obj[upper.tri(tmat_obj)] |
| 227 | 2x |
rownames(mat_obj) <- colnames(mat_obj) <- unique(df$INNER_ID_EMD) |
| 228 | 2x |
out_data <- mat_obj |
| 229 |
} else { # make long data
|
|
| 230 | 2x |
out_data <- do.call(rbind, lapply(seq_along(unique(df$INNER_ID_EMD)), function(i_n) {
|
| 231 | 100x |
do.call(rbind, parallel::mclapply(seq_along(unique(df$INNER_ID_EMD)), function(j_n) {
|
| 232 | 5000x |
i <- unique(df$INNER_ID_EMD)[i_n] |
| 233 | 5000x |
j <- unique(df$INNER_ID_EMD)[j_n] |
| 234 | 5000x |
emdOut <- NULL |
| 235 | 5000x |
if (i_n == j_n) {
|
| 236 | 100x |
emdOut <- 0 |
| 237 | 4900x |
} else if (i_n < j_n) {
|
| 238 | 2450x |
emdOut <- dist_1d( |
| 239 | 2450x |
as.numeric(df[df$INNER_ID_EMD == as.character(i), value]), |
| 240 | 2450x |
as.numeric(df[df$INNER_ID_EMD == as.character(j), value]) |
| 241 |
) |
|
| 242 |
} |
|
| 243 | 5000x |
if (!is.null(emdOut)) {
|
| 244 | 2550x |
if (!is.null(include)) {
|
| 245 | 2550x |
x <- rbind( |
| 246 | 2550x |
data.frame( |
| 247 | 2550x |
i = i, j = j, emd = emdOut, |
| 248 | 2550x |
df[df$INNER_ID_EMD == as.character(i), include][1, ], |
| 249 | 2550x |
df[df$INNER_ID_EMD == as.character(j), include][1, ] |
| 250 |
), |
|
| 251 | 2550x |
data.frame( |
| 252 | 2550x |
i = j, j = i, emd = emdOut, |
| 253 | 2550x |
df[df$INNER_ID_EMD == as.character(j), include][1, ], |
| 254 | 2550x |
df[df$INNER_ID_EMD == as.character(i), include][1, ] |
| 255 |
) |
|
| 256 |
) |
|
| 257 | 2550x |
colnames(x) <- c("i", "j", method, paste0(include, "_i"), paste0(include, "_j"))
|
| 258 |
} else {
|
|
| 259 | ! |
x <- data.frame(i = c(i, j), j = c(j, i), emd = emdOut) |
| 260 |
} |
|
| 261 | 2550x |
x |
| 262 |
} |
|
| 263 | 100x |
}, mc.cores = parallel)) |
| 264 |
})) |
|
| 265 |
} |
|
| 266 | 4x |
return(out_data) |
| 267 |
} |
|
| 268 | ||
| 269 |
#* wide EMD calculation |
|
| 270 |
#' @keywords internal |
|
| 271 |
#' @noRd |
|
| 272 | ||
| 273 |
.wideEmdCalculation <- function(df, cols, raiseError, parallel, id, include, mat, method) {
|
|
| 274 | 8x |
dist_1d <- match.fun(paste0(method, "1d")) |
| 275 | 8x |
if (is.null(cols)) {
|
| 276 | ! |
cols <- colnames(df) |
| 277 | 8x |
} else if (is.character(cols) && length(cols) == 1) {
|
| 278 | 8x |
cols <- grepl(cols, colnames(df)) |
| 279 |
} |
|
| 280 |
#* `raise error` |
|
| 281 | 8x |
.emdRaiseError(raiseError, df, parallel, trait = NULL) |
| 282 |
#* `calculate emd` |
|
| 283 | 8x |
if (mat) { # make dist matrix
|
| 284 | 2x |
mat_obj <- matrix(0, nrow = nrow(df), ncol = nrow(df)) |
| 285 | 2x |
values <- unlist(lapply(seq_len(nrow(df)), function(i) {
|
| 286 | 40x |
parallel::mclapply(seq_len(nrow(df)), function(j) {
|
| 287 | 800x |
if (i < j) {
|
| 288 | 380x |
dist_1d(as.numeric(df[i, cols]), as.numeric(df[j, cols])) |
| 289 |
} |
|
| 290 | 40x |
}, mc.cores = parallel) |
| 291 |
})) |
|
| 292 | 2x |
mat_obj[lower.tri(mat_obj)] <- values |
| 293 | 2x |
tmat_obj <- t(mat_obj) |
| 294 | 2x |
mat_obj[upper.tri(mat_obj)] <- tmat_obj[upper.tri(tmat_obj)] |
| 295 | 2x |
rownames(mat_obj) <- colnames(mat_obj) <- seq_len(nrow(df)) |
| 296 | 2x |
out_data <- mat_obj |
| 297 | 2x |
if (!is.null(include)) {
|
| 298 | ! |
rownames(out_data) <- interaction(df[, include]) |
| 299 |
} |
|
| 300 |
} else { # make long dataframe
|
|
| 301 | 6x |
out_data <- do.call(rbind, lapply(seq_len(nrow(df)), function(i) {
|
| 302 | 160x |
do.call(rbind, parallel::mclapply(seq_len(nrow(df)), function(j) {
|
| 303 | 6000x |
emdOut <- NULL |
| 304 | 6000x |
if (i == j) {
|
| 305 | 160x |
emdOut <- 0 |
| 306 | 5840x |
} else if (i < j) {
|
| 307 | 2920x |
emdOut <- dist_1d(as.numeric(df[i, cols]), as.numeric(df[j, cols])) |
| 308 |
} |
|
| 309 | 6000x |
if (!is.null(emdOut)) {
|
| 310 | 3080x |
if (!is.null(include)) {
|
| 311 | 3080x |
x <- rbind( |
| 312 | 3080x |
data.frame(i = i, j = j, emd = emdOut, df[i, include], df[j, include]), |
| 313 | 3080x |
data.frame(i = j, j = i, emd = emdOut, df[j, include], df[i, include]) |
| 314 |
) |
|
| 315 | 3080x |
colnames(x) <- c("i", "j", method, paste0(include, "_i"), paste0(include, "_j"))
|
| 316 |
} else {
|
|
| 317 | ! |
x <- data.frame(i = c(i, j), j = c(j, i), emd = emdOut) |
| 318 |
} |
|
| 319 | 3080x |
x |
| 320 |
} |
|
| 321 | 160x |
}, mc.cores = parallel)) |
| 322 |
})) |
|
| 323 |
} |
|
| 324 | 8x |
return(out_data) |
| 325 |
} |
|
| 326 | ||
| 327 |
#' Earth Mover's Distance between spectral histograms |
|
| 328 |
#' |
|
| 329 |
#' @description emd1d computes 1 dimension EMD for two samples. |
|
| 330 |
#' |
|
| 331 |
#' @param s1 Histogram as a numeric vector of counts per position. |
|
| 332 |
#' @param s2 Histogram as a numeric vector of counts per position. Must be the same length as s1. |
|
| 333 |
#' |
|
| 334 |
#' @keywords internal |
|
| 335 |
#' @return Returns EMD between two samples as a numeric. |
|
| 336 |
#' @examples |
|
| 337 |
#' |
|
| 338 |
#' set.seed(123) |
|
| 339 |
#' s1 <- hist(rnorm(10000, 50, 10), breaks = seq(1, 100, 1))$counts |
|
| 340 |
#' s2 <- hist(rlnorm(9000, log(30), 0.25), breaks = seq(1, 100, 1))$counts |
|
| 341 |
#' plot(s2, type = "l"); lines(s1) |
|
| 342 |
#' emd1d(s1, s2) |
|
| 343 |
#' |
|
| 344 |
#' @noRd |
|
| 345 |
#' |
|
| 346 |
emd1d <- function(s1, s2) {
|
|
| 347 | 5561x |
if (length(s1) != length(s2)) {
|
| 348 | 1x |
stop("Samples must be from the same histogram and be of the same length")
|
| 349 |
} |
|
| 350 | 5560x |
s1 <- s1 / sum(s1) |
| 351 | 5560x |
s2 <- s2 / sum(s2) |
| 352 | 5560x |
emd_iter <- numeric(length(s1) + 1) |
| 353 | 5560x |
for (i in seq_along(s1)) {
|
| 354 | 1169330x |
emd_iter[i + 1] <- s1[i] - s2[i] + emd_iter[i] |
| 355 |
} |
|
| 356 | 5560x |
return(sum(abs(emd_iter))) |
| 357 |
} |
|
| 358 | ||
| 359 |
#' Euclidean Distance between spectral histograms |
|
| 360 |
#' |
|
| 361 |
#' @description euc1d computes euclidean distance between two samples. |
|
| 362 |
#' |
|
| 363 |
#' @param s1 Histogram as a numeric vector of counts per position. |
|
| 364 |
#' @param s2 Histogram as a numeric vector of counts per position. Must be the same length as s1. |
|
| 365 |
#' |
|
| 366 |
#' @importFrom stats dist |
|
| 367 |
#' @keywords internal |
|
| 368 |
#' @return Returns euclidean distance as a numeric |
|
| 369 |
#' |
|
| 370 |
#' @noRd |
|
| 371 | ||
| 372 |
euc1d <- function(s1, s2) {
|
|
| 373 | 571x |
if (length(s1) != length(s2)) {
|
| 374 | 1x |
stop("Samples must be from the same histogram and be of the same length")
|
| 375 |
} |
|
| 376 | 570x |
s1 <- s1 / sum(s1) |
| 377 | 570x |
s2 <- s2 / sum(s2) |
| 378 | 570x |
mat <- matrix(c(s1, s2), nrow = 2, byrow = TRUE) |
| 379 | 570x |
euc <- as.numeric(stats::dist(mat, method = "euclidean")) |
| 380 | 570x |
return(euc) |
| 381 |
} |
|
| 382 | ||
| 383 |
#' @rdname pcv.emd |
|
| 384 |
#' @export |
|
| 385 |
#' |
|
| 386 |
pcv.euc <- function(df, cols = NULL, reorder = NULL, include = reorder, mat = FALSE, plot = TRUE, |
|
| 387 |
parallel = getOption("mc.cores", 1), trait = "trait", id = "image",
|
|
| 388 |
value = "value", raiseError = TRUE, method = "euc") {
|
|
| 389 | 2x |
pcv.emd(df, cols, reorder, include, mat, plot, parallel, trait, id, value, raiseError, method) |
| 390 |
} |
| 1 |
#' Helper function to put together formulae for brms changepoint growth models |
|
| 2 |
#' |
|
| 3 |
#' @param model A multi-part model passed from brmSS passed from \code{\link{growthSS}}
|
|
| 4 |
#' @param x The x variable from the pcvrForm argument in \code{\link{growthSS}}
|
|
| 5 |
#' @param y The y variable from the pcvrForm argument in \code{\link{growthSS}}
|
|
| 6 |
#' @param group The grouping variable from the pcvrForm argument in \code{\link{growthSS}}
|
|
| 7 |
#' @param dpar Logical, is this a distributional parameter formula (TRUE) or part of the main growth |
|
| 8 |
#' formula (FALSE)? |
|
| 9 |
#' @param nTimes a Number of times that are present in the data, only used for making splines have a |
|
| 10 |
#' workable number of knots. |
|
| 11 |
#' @param useGroup logical, should groups be used? |
|
| 12 |
#' @param priors A list describing priors in the style of \code{\link{brmSS}}, \code{\link{growthSS}},
|
|
| 13 |
#' and \code{\link{growthSim}}. This is only used currently to identify fixed and estimated
|
|
| 14 |
#' changepoints. If a changepoint is called "changePointX" with X being its position in the formula |
|
| 15 |
#' then it will be estimated as a parameter in the model, but if the changepoint is called |
|
| 16 |
#' "fixedChangePointX" then it will be passed as a numeric in the growth model. |
|
| 17 |
#' @param int logical, should an intercept be modeled? |
|
| 18 |
#' |
|
| 19 |
#' @examples |
|
| 20 |
#' df1 <- do.call(rbind, lapply(1:30, function(i) {
|
|
| 21 |
#' chngpt <- rnorm(2, 10, 1.5) |
|
| 22 |
#' A <- growthSim("linear", n = 1, t = chngpt[1], params = list("A" = c(1)))
|
|
| 23 |
#' B <- growthSim("linear", n = 1, t = chngpt[2], params = list("A" = c(0.9)))
|
|
| 24 |
#' B$group <- "b" |
|
| 25 |
#' x <- rbind(A, B) |
|
| 26 |
#' x$id <- paste0("id_", i)
|
|
| 27 |
#' x |
|
| 28 |
#' })) |
|
| 29 |
#' df2 <- growthSim("linear", n = 30, t = 20, params = list("A" = c(4.1, 5)))
|
|
| 30 |
#' df2 <- do.call(rbind, lapply(unique(paste0(df2$id, df2$group)), function(int) {
|
|
| 31 |
#' df1sub <- df1[paste0(df1$id, df1$group) == int, ] |
|
| 32 |
#' df2sub <- df2[paste0(df2$id, df2$group) == int, ] |
|
| 33 |
#' y_end <- df1sub[df1sub$time == max(df1sub$time), "y"] |
|
| 34 |
#' df2sub$time <- df2sub$time + max(df1sub$time) |
|
| 35 |
#' df2sub$y <- y_end + df2sub$y |
|
| 36 |
#' df2sub |
|
| 37 |
#' })) |
|
| 38 |
#' df <- rbind(df1, df2) |
|
| 39 |
#' ggplot(df, aes(time, y, group = interaction(group, id))) + |
|
| 40 |
#' geom_line(aes(color = group)) + |
|
| 41 |
#' theme_minimal() |
|
| 42 |
#' |
|
| 43 |
#' .brmsChangePointHelper(model = "linear + linear", x = "time", y = "y", group = "group") |
|
| 44 |
#' |
|
| 45 |
#' @keywords internal |
|
| 46 |
#' @noRd |
|
| 47 | ||
| 48 |
.brmsChangePointHelper <- function(model, x, y, group, dpar = FALSE, |
|
| 49 |
nTimes = 25, useGroup, priors, int = FALSE) {
|
|
| 50 | 5x |
component_models <- trimws(strsplit(model, "\\+")[[1]]) |
| 51 | 5x |
models <- c( |
| 52 | 5x |
"logistic", "gompertz", "monomolecular", "exponential", "linear", "power law", "gam", |
| 53 | 5x |
"spline", "int", "homo", "weibull", "frechet", "gumbel", "logarithmic" |
| 54 |
) |
|
| 55 | ||
| 56 | 5x |
if (dpar) {
|
| 57 | 2x |
prefix <- y |
| 58 |
} else {
|
|
| 59 | 3x |
prefix <- NULL |
| 60 |
} |
|
| 61 | ||
| 62 | 5x |
mainGrowthModelPriorStrings <- paste( |
| 63 | 5x |
paste0("^", gsub(
|
| 64 |
" ", "", |
|
| 65 | 5x |
c(models, "changePoint", "fixedChangePoint") |
| 66 |
)), |
|
| 67 | 5x |
collapse = "|" |
| 68 |
) |
|
| 69 | 5x |
if (dpar) {
|
| 70 | 2x |
priors <- priors[grepl(prefix, names(priors))] |
| 71 |
} else {
|
|
| 72 | 3x |
priors <- priors[grepl(mainGrowthModelPriorStrings, names(priors))] |
| 73 |
} |
|
| 74 |
# else should be any prior whose name starts with a model name, |
|
| 75 |
# with changePoint or with fixedChangePoint. |
|
| 76 | ||
| 77 | 5x |
formulae <- lapply(seq_along(component_models), function(i) {
|
| 78 | 10x |
iter_model <- component_models[i] |
| 79 | ||
| 80 | 10x |
if (grepl("decay", iter_model)) {
|
| 81 | 2x |
decay <- TRUE |
| 82 | 2x |
iter_model <- trimws(gsub("decay", "", iter_model))
|
| 83 |
} else {
|
|
| 84 | 8x |
decay <- FALSE |
| 85 |
} |
|
| 86 | ||
| 87 | 10x |
matched_iter_model <- match.arg(iter_model, models) |
| 88 | 10x |
matched_iter_model <- gsub("homo", "int", matched_iter_model) # recoding
|
| 89 | 10x |
matched_iter_model <- gsub("spline", "gam", matched_iter_model) # recoding
|
| 90 | ||
| 91 | 10x |
chngptFormFun <- get(paste0(".", gsub(" ", "", matched_iter_model), "ChngptForm"))
|
| 92 | 10x |
iter <- chngptFormFun(x, i, dpar = prefix, priors) |
| 93 | 10x |
if (decay) {
|
| 94 | 2x |
iter <- .decayChngptForm(iter) |
| 95 |
} |
|
| 96 | 10x |
return(iter) |
| 97 |
}) |
|
| 98 | ||
| 99 | 5x |
params <- unique(unlist(lapply(formulae, function(f) {
|
| 100 | 10x |
f$params |
| 101 |
}))) |
|
| 102 | 5x |
params <- params[-length(params)] |
| 103 | ||
| 104 | 5x |
if (int) { # for changepoint models with an intercept add I term to formula and params
|
| 105 | 2x |
formula_starter_string <- paste0(y, " ~ ", prefix, "I + ") |
| 106 | 2x |
params <- append(params, paste0(prefix, "I")) |
| 107 |
} else {
|
|
| 108 | 3x |
formula_starter_string <- paste0(y, " ~ ") |
| 109 |
} |
|
| 110 | ||
| 111 | 5x |
growthForm <- paste0(formula_starter_string, formulae[[1]]$form, " * ", formulae[[1]]$cp) |
| 112 |
#* Make cpInt cumulative |
|
| 113 | 5x |
for (i in 2:length(formulae)) {
|
| 114 | 5x |
cumulativeCpInt <- do.call(paste, list(lapply(1:i, function(o) {
|
| 115 | 10x |
formulae[[o]]$cpInt |
| 116 | 5x |
}), collapse = " + ")) |
| 117 | 5x |
formulae[[i]]$cpInt <- cumulativeCpInt |
| 118 |
} |
|
| 119 |
# assemble segments into complete formula |
|
| 120 | 5x |
for (i in 2:length(formulae)) {
|
| 121 | 5x |
nextPhase <- paste0( |
| 122 | 5x |
"+ (", formulae[[(i - 1)]]$cpInt, " + ", formulae[[i]]$form, ") * ",
|
| 123 | 5x |
formulae[[i]]$cp |
| 124 |
) |
|
| 125 | 5x |
growthForm <- paste0(growthForm, nextPhase) |
| 126 |
} |
|
| 127 | 5x |
tryCatch( |
| 128 | 5x |
expr = {
|
| 129 | 5x |
growthForm <- stats::as.formula(growthForm) |
| 130 |
}, |
|
| 131 | 5x |
error = function(err) {
|
| 132 | 1x |
message(paste0( |
| 133 | 1x |
"Error while assembling changepoint formula, did you specify priors for ", |
| 134 | 1x |
paste(params, collapse = ", "), "? Changepoint priors must be named." |
| 135 |
)) |
|
| 136 | 1x |
message("The original Error message is:")
|
| 137 | 1x |
stop(conditionMessage(err)) |
| 138 |
} |
|
| 139 |
) |
|
| 140 | 4x |
growthForm <- stats::as.formula(growthForm) |
| 141 | ||
| 142 | 4x |
if (dpar) {
|
| 143 | 2x |
growthForm <- brms::nlf(growthForm) |
| 144 |
} |
|
| 145 | ||
| 146 | 4x |
splineSegments <- which(unlist(lapply(formulae, function(fml) {
|
| 147 | 8x |
"splineVar" %in% names(fml) |
| 148 |
}))) |
|
| 149 | ||
| 150 | 4x |
splineForm <- .handleSplineSegments(splineSegments, useGroup, group, nTimes, formulae, x) |
| 151 | ||
| 152 | 4x |
return(list("growthForm" = growthForm, "pars" = params, "splineHelperForm" = splineForm))
|
| 153 |
} |
|
| 154 | ||
| 155 |
#* **************************************** |
|
| 156 |
#* ***** `Handle Spline Segments` ***** |
|
| 157 |
#* **************************************** |
|
| 158 | ||
| 159 |
#' spline formula helper function |
|
| 160 |
#' @keywords internal |
|
| 161 |
#' @noRd |
|
| 162 | ||
| 163 |
.handleSplineSegments <- function(splineSegments, useGroup, group, nTimes, formulae, x) {
|
|
| 164 | 4x |
if (length(splineSegments) > 0) {
|
| 165 | 2x |
if (useGroup) {
|
| 166 | 1x |
by <- paste0(", by = ", group)
|
| 167 |
} else {
|
|
| 168 | 1x |
by <- "," |
| 169 |
} |
|
| 170 | 2x |
if (nTimes < 11) {
|
| 171 | 1x |
k <- paste0(", k = ", nTimes)
|
| 172 |
} else {
|
|
| 173 | 1x |
k <- NULL |
| 174 |
} |
|
| 175 | 2x |
splineVars <- c() |
| 176 | 2x |
for (seg in splineSegments) {
|
| 177 | 2x |
splineVars <- c(splineVars, formulae[[seg]]$splineVar) |
| 178 |
} |
|
| 179 | 2x |
lhs <- paste0(splineVars, collapse = "+") |
| 180 | 2x |
rhs <- paste0("s(", x, by, k, ")")
|
| 181 | 2x |
splineForm <- paste0(lhs, "~", rhs) |
| 182 |
} else {
|
|
| 183 | 2x |
splineForm <- NULL |
| 184 |
} |
|
| 185 | 4x |
return(splineForm) |
| 186 |
} |
|
| 187 | ||
| 188 | ||
| 189 |
#* **************************************** |
|
| 190 |
#* ***** `Linear Changepoint Phase` ***** |
|
| 191 |
#* **************************************** |
|
| 192 | ||
| 193 |
#' Linear changepoint section function |
|
| 194 |
#' |
|
| 195 |
#' @param x X variable name |
|
| 196 |
#' @param position Position in growth formula ("1" + "2" + "3"... etc)
|
|
| 197 |
#' @param dpar string or NULL, if string should be the name of the distributional parameter |
|
| 198 |
#' @param priors a list of prior distributions (used for fixed vs estimated changepoints) |
|
| 199 |
#' |
|
| 200 |
#' @examples |
|
| 201 |
#' |
|
| 202 |
#' .linearChngptForm(x = "time", 1) |
|
| 203 |
#' .linearChngptForm(x = "time", 2) |
|
| 204 |
#' .linearChngptForm(x = "time", 3) |
|
| 205 |
#' |
|
| 206 |
#' @return a list with form, cp, and cpInt elements. "form" is the growth formula |
|
| 207 |
#' for this phase of the model. "cp" is the inv_logit function defining when this |
|
| 208 |
#' phase should happen. "cpInt" is the value at the end of this growth phase and is |
|
| 209 |
#' used in starting the next growth phase from the right y value. |
|
| 210 |
#' @noRd |
|
| 211 | ||
| 212 |
.linearChngptForm <- function(x, position = 1, dpar = NULL, priors) { # return f, cp, and cpInt
|
|
| 213 | ||
| 214 | 29x |
prefix <- chngptPrefix <- dpar |
| 215 | ||
| 216 | 29x |
if (any(grepl(paste0("fixedChangePoint", position), names(priors)))) {
|
| 217 | 14x |
changePointObj <- as.numeric(priors[[paste0(prefix, "fixedChangePoint", position)]])[1] |
| 218 | 14x |
fixed <- TRUE |
| 219 | 14x |
chngptPrefix <- NULL # never a prefix if the changepoint is a fixed value, even in sub model |
| 220 |
} else {
|
|
| 221 | 15x |
fixed <- FALSE |
| 222 |
} |
|
| 223 | ||
| 224 | 29x |
if (position == 1) {
|
| 225 | 11x |
if (!fixed) {
|
| 226 | 5x |
changePointObj <- "changePoint1" |
| 227 |
} |
|
| 228 | ||
| 229 | 11x |
form <- paste0(prefix, "linear", position, "A * ", x) |
| 230 | 11x |
cp <- paste0("inv_logit((", chngptPrefix, changePointObj, " - ", x, ") * 5)")
|
| 231 | 11x |
cpInt <- paste0("(", prefix, "linear", position, "A * ", chngptPrefix, changePointObj, ")")
|
| 232 |
} else {
|
|
| 233 | 18x |
all_chngpts <- names(priors)[grepl("fixedChangePoint*|changePoint*", names(priors))]
|
| 234 | 18x |
prevChangePoints <- all_chngpts[which(as.numeric(sub(".*hangePoint", "", all_chngpts)) < position)]
|
| 235 | 18x |
prevAndCurrentChangePoints <- all_chngpts[which( |
| 236 | 18x |
as.numeric(sub(".*hangePoint", "", all_chngpts)) %in% c(position, position - 1)
|
| 237 |
)] |
|
| 238 |
#* per location where "fixed" is in the prior name, replace the name with that number. |
|
| 239 | 18x |
prev_fixed_index <- which(grepl("fixed", prevChangePoints))
|
| 240 | 18x |
if (length(prev_fixed_index) > 0) {
|
| 241 | 12x |
prevChangePoints[prev_fixed_index] <- as.numeric(priors[prevChangePoints[prev_fixed_index]]) |
| 242 |
} |
|
| 243 | 18x |
pac_fixed_index <- which(grepl("fixed", prevAndCurrentChangePoints))
|
| 244 | 18x |
if (length(pac_fixed_index) > 0) {
|
| 245 | 14x |
prevAndCurrentChangePoints[pac_fixed_index] <- as.numeric( |
| 246 | 14x |
priors[prevAndCurrentChangePoints[pac_fixed_index]] |
| 247 |
) |
|
| 248 |
} |
|
| 249 | ||
| 250 | 18x |
form <- paste0( |
| 251 | 18x |
prefix, "linear", position, "A * (", x, "-",
|
| 252 | 18x |
paste0(prevChangePoints, collapse = "-"), ")" |
| 253 |
) |
|
| 254 | 18x |
cp <- paste0("inv_logit((", x, "-", paste0(prevChangePoints, collapse = "-"), ") * 5)")
|
| 255 | 18x |
cpInt <- paste0( |
| 256 | 18x |
prefix, "linear", position, "A * (", paste0(rev(prevAndCurrentChangePoints),
|
| 257 | 18x |
collapse = "-" |
| 258 |
), |
|
| 259 |
")" |
|
| 260 |
) |
|
| 261 |
#* cpInt would be wrong for the last position but it isn't used. |
|
| 262 |
} |
|
| 263 | ||
| 264 | 29x |
pars <- paste0(prefix, "linear", position, "A") |
| 265 | 29x |
if (!fixed) {
|
| 266 | 15x |
pars <- c(pars, paste0(chngptPrefix, "changePoint", position)) |
| 267 |
} |
|
| 268 | ||
| 269 | 29x |
return(list( |
| 270 | 29x |
"form" = form, |
| 271 | 29x |
"cp" = cp, |
| 272 | 29x |
"cpInt" = cpInt, |
| 273 | 29x |
"params" = pars |
| 274 |
)) |
|
| 275 |
} |
|
| 276 | ||
| 277 | ||
| 278 |
#* **************************************** |
|
| 279 |
#* ***** `Logistic Changepoint Phase` ***** |
|
| 280 |
#* **************************************** |
|
| 281 | ||
| 282 |
#' Logistic changepoint section function |
|
| 283 |
#' |
|
| 284 |
#' @param x X variable name |
|
| 285 |
#' @param position Position in growth formula ("1" + "2" + "3"... etc)
|
|
| 286 |
#' @param dpar string or NULL, if string should be the name of the distributional parameter |
|
| 287 |
#' @param priors a list of prior distributions (used for fixed vs estimated changepoints) |
|
| 288 |
#' |
|
| 289 |
#' @examples |
|
| 290 |
#' |
|
| 291 |
#' .logisticChngptForm(x = "time", 1) |
|
| 292 |
#' .logisticChngptForm(x = "time", 2) |
|
| 293 |
#' .logisticChngptForm(x = "time", 3) |
|
| 294 |
#' |
|
| 295 |
#' @return a list with form, cp, and cpInt elements. "form" is the growth formula |
|
| 296 |
#' for this phase of the model. "cp" is the inv_logit function defining when this |
|
| 297 |
#' phase should happen. "cpInt" is the value at the end of this growth phase and is |
|
| 298 |
#' used in starting the next growth phase from the right y value. |
|
| 299 |
#' |
|
| 300 |
#' @noRd |
|
| 301 | ||
| 302 |
.logisticChngptForm <- function(x, position = 1, dpar = NULL, priors) { # return f, cp, and cpInt
|
|
| 303 | ||
| 304 | 24x |
prefix <- chngptPrefix <- dpar |
| 305 | ||
| 306 | 24x |
if (any(grepl(paste0("fixedChangePoint", position), names(priors)))) {
|
| 307 | 12x |
changePointObj <- as.numeric(priors[[paste0(prefix, "fixedChangePoint", position)]])[1] |
| 308 | 12x |
fixed <- TRUE |
| 309 | 12x |
chngptPrefix <- NULL # never a prefix if the changepoint is a fixed value |
| 310 |
} else {
|
|
| 311 | 12x |
fixed <- FALSE |
| 312 |
} |
|
| 313 | ||
| 314 | 24x |
if (position == 1) {
|
| 315 | 8x |
if (!fixed) {
|
| 316 | 4x |
changePointObj <- "changePoint1" |
| 317 |
} |
|
| 318 | ||
| 319 | 8x |
form <- paste0( |
| 320 | 8x |
prefix, "logistic", position, "A / (1 + exp( (", prefix, "logistic", position,
|
| 321 | 8x |
"B-(", x, "))/", prefix, "logistic", position, "C) )"
|
| 322 |
) |
|
| 323 | 8x |
cp <- paste0("inv_logit((", chngptPrefix, changePointObj, " - ", x, ") * 5)")
|
| 324 | 8x |
cpInt <- paste0( |
| 325 | 8x |
prefix, "logistic", position, "A / (1 + exp( (", prefix, "logistic", position,
|
| 326 | 8x |
"B-(", chngptPrefix, changePointObj, "))/", prefix, "logistic", position, "C) )"
|
| 327 |
) |
|
| 328 |
} else {
|
|
| 329 | 16x |
all_chngpts <- names(priors)[grepl("fixedChangePoint*|changePoint*", names(priors))]
|
| 330 | 16x |
prevChangePoints <- all_chngpts[which(as.numeric(sub(".*hangePoint", "", all_chngpts)) < position)]
|
| 331 | 16x |
prevAndCurrentChangePoints <- all_chngpts[which( |
| 332 | 16x |
as.numeric(sub(".*hangePoint", "", all_chngpts)) %in% c(position, position - 1)
|
| 333 |
)] |
|
| 334 |
#* per location where "fixed" is in the prior name, replace the name with that number. |
|
| 335 | 16x |
prev_fixed_index <- which(grepl("fixed", prevChangePoints))
|
| 336 | 16x |
if (length(prev_fixed_index) > 0) {
|
| 337 | 10x |
prevChangePoints[prev_fixed_index] <- as.numeric(priors[prevChangePoints[prev_fixed_index]]) |
| 338 |
} |
|
| 339 | 16x |
pac_fixed_index <- which(grepl("fixed", prevAndCurrentChangePoints))
|
| 340 | 16x |
if (length(pac_fixed_index) > 0) {
|
| 341 | 12x |
prevAndCurrentChangePoints[pac_fixed_index] <- as.numeric( |
| 342 | 12x |
priors[prevAndCurrentChangePoints[pac_fixed_index]] |
| 343 |
) |
|
| 344 |
} |
|
| 345 | ||
| 346 | 16x |
form <- paste0( |
| 347 | 16x |
prefix, "logistic", position, "A / (1 + exp( (", prefix, "logistic", position,
|
| 348 | 16x |
"B-(", x, "-", paste0(prevChangePoints, collapse = "-"),
|
| 349 | 16x |
"))/", prefix, "logistic", position, "C) )" |
| 350 |
) |
|
| 351 | 16x |
cp <- paste0("inv_logit((", x, "-", paste0(prevChangePoints, collapse = "-"), ") * 5)")
|
| 352 | 16x |
cpInt <- paste0( |
| 353 | 16x |
prefix, "logistic", position, "A / (1 + exp( (", prefix, "logistic", position,
|
| 354 | 16x |
"B-(", paste0(rev(prevAndCurrentChangePoints), collapse = "-"), "))/", prefix, "logistic",
|
| 355 | 16x |
position, "C) )" |
| 356 |
) |
|
| 357 |
} |
|
| 358 | 24x |
pars <- paste0(prefix, "logistic", position, c("A", "B", "C"))
|
| 359 | 24x |
if (!fixed) {
|
| 360 | 12x |
pars <- c(pars, paste0(chngptPrefix, "changePoint", position)) |
| 361 |
} |
|
| 362 | 24x |
return(list( |
| 363 | 24x |
"form" = form, |
| 364 | 24x |
"cp" = cp, |
| 365 | 24x |
"cpInt" = cpInt, |
| 366 | 24x |
"params" = pars |
| 367 |
)) |
|
| 368 |
} |
|
| 369 | ||
| 370 | ||
| 371 |
#* **************************************** |
|
| 372 |
#* ***** `Gompertz Changepoint Phase` ***** |
|
| 373 |
#* **************************************** |
|
| 374 | ||
| 375 |
#' Gompertz changepoint section function |
|
| 376 |
#' |
|
| 377 |
#' @param x X variable name |
|
| 378 |
#' @param position Position in growth formula ("1" + "2" + "3"... etc)
|
|
| 379 |
#' @param dpar string or NULL, if string should be the name of the distributional parameter |
|
| 380 |
#' @param priors a list of prior distributions (used for fixed vs estimated changepoints) |
|
| 381 |
#' |
|
| 382 |
#' @examples |
|
| 383 |
#' |
|
| 384 |
#' .gompertzChngptForm(x = "time", 1) |
|
| 385 |
#' .gompertzChngptForm(x = "time", 2) |
|
| 386 |
#' .gompertzChngptForm(x = "time", 3) |
|
| 387 |
#' |
|
| 388 |
#' @return a list with form, cp, and cpInt elements. "form" is the growth formula |
|
| 389 |
#' for this phase of the model. "cp" is the inv_logit function defining when this |
|
| 390 |
#' phase should happen. "cpInt" is the value at the end of this growth phase and is |
|
| 391 |
#' used in starting the next growth phase from the right y value. |
|
| 392 |
#' @noRd |
|
| 393 | ||
| 394 |
.gompertzChngptForm <- function(x, position = 1, dpar = NULL, priors) { # return f, cp, and cpInt
|
|
| 395 | ||
| 396 | 24x |
prefix <- chngptPrefix <- dpar |
| 397 | ||
| 398 | 24x |
if (any(grepl(paste0("fixedChangePoint", position), names(priors)))) {
|
| 399 | 12x |
changePointObj <- as.numeric(priors[[paste0(prefix, "fixedChangePoint", position)]])[1] |
| 400 | 12x |
fixed <- TRUE |
| 401 | 12x |
chngptPrefix <- NULL # never a prefix if the changepoint is a fixed value |
| 402 |
} else {
|
|
| 403 | 12x |
fixed <- FALSE |
| 404 |
} |
|
| 405 | ||
| 406 | 24x |
if (position == 1) {
|
| 407 | 8x |
if (!fixed) {
|
| 408 | 4x |
changePointObj <- "changePoint1" |
| 409 |
} |
|
| 410 | 8x |
form <- paste0( |
| 411 | 8x |
prefix, "gompertz", position, "A * exp(-", prefix, "gompertz", position, |
| 412 | 8x |
"B * exp(-", prefix, "gompertz", position, "C * ", x, "))" |
| 413 |
) |
|
| 414 | 8x |
cp <- paste0("inv_logit((", prefix, changePointObj, " - ", x, ") * 5)")
|
| 415 | 8x |
cpInt <- paste0( |
| 416 | 8x |
prefix, "gompertz", position, "A * exp(-", prefix, "gompertz", position, |
| 417 | 8x |
"B * exp(-", prefix, "gompertz", position, "C * ", chngptPrefix, changePointObj, |
| 418 |
"))" |
|
| 419 |
) |
|
| 420 |
} else {
|
|
| 421 | 16x |
all_chngpts <- names(priors)[grepl("fixedChangePoint*|changePoint*", names(priors))]
|
| 422 | 16x |
prevChangePoints <- all_chngpts[which(as.numeric(sub(".*hangePoint", "", all_chngpts)) < position)]
|
| 423 | 16x |
prevAndCurrentChangePoints <- all_chngpts[which( |
| 424 | 16x |
as.numeric(sub(".*hangePoint", "", all_chngpts)) %in% c(position, position - 1)
|
| 425 |
)] |
|
| 426 |
#* per location where "fixed" is in the prior name, replace the name with that number. |
|
| 427 | 16x |
prev_fixed_index <- which(grepl("fixed", prevChangePoints))
|
| 428 | 16x |
if (length(prev_fixed_index) > 0) {
|
| 429 | 10x |
prevChangePoints[prev_fixed_index] <- as.numeric(priors[prevChangePoints[prev_fixed_index]]) |
| 430 |
} |
|
| 431 | 16x |
pac_fixed_index <- which(grepl("fixed", prevAndCurrentChangePoints))
|
| 432 | 16x |
if (length(pac_fixed_index) > 0) {
|
| 433 | 12x |
prevAndCurrentChangePoints[pac_fixed_index] <- as.numeric( |
| 434 | 12x |
priors[prevAndCurrentChangePoints[pac_fixed_index]] |
| 435 |
) |
|
| 436 |
} |
|
| 437 | ||
| 438 | 16x |
form <- paste0( |
| 439 | 16x |
prefix, "gompertz", position, "A * exp(-", prefix, "gompertz", position, |
| 440 | 16x |
"B * exp(-", prefix, "gompertz", position, "C * (", x, " - ",
|
| 441 | 16x |
paste0(prevChangePoints, collapse = "-"), ")))" |
| 442 |
) |
|
| 443 | ||
| 444 | 16x |
cp <- paste0("inv_logit((", x, "-", paste0(prevChangePoints, collapse = "-"), ") * 5)")
|
| 445 | 16x |
cpInt <- paste0( |
| 446 | 16x |
prefix, "gompertz", position, "A * exp(-", prefix, "gompertz", position, "B * exp(-", prefix, |
| 447 | 16x |
"gompertz", position, |
| 448 | 16x |
"C * (", paste0(rev(prevAndCurrentChangePoints), collapse = "-"), "))"
|
| 449 |
) |
|
| 450 |
} |
|
| 451 | 24x |
pars <- paste0(prefix, "gompertz", position, c("A", "B", "C"))
|
| 452 | 24x |
if (!fixed) {
|
| 453 | 12x |
pars <- c(pars, paste0(chngptPrefix, "changePoint", position)) |
| 454 |
} |
|
| 455 | 24x |
return(list( |
| 456 | 24x |
"form" = form, |
| 457 | 24x |
"cp" = cp, |
| 458 | 24x |
"cpInt" = cpInt, |
| 459 | 24x |
"params" = pars |
| 460 |
)) |
|
| 461 |
} |
|
| 462 | ||
| 463 |
#* **************************************** |
|
| 464 |
#* ***** `monomolecular Changepoint Phase` ***** |
|
| 465 |
#* **************************************** |
|
| 466 | ||
| 467 |
#' Monomolecular changepoint section function |
|
| 468 |
#' |
|
| 469 |
#' @param x X variable name |
|
| 470 |
#' @param position Position in growth formula ("1" + "2" + "3"... etc)
|
|
| 471 |
#' @param dpar string or NULL, if string should be the name of the distributional parameter |
|
| 472 |
#' @param priors a list of prior distributions (used for fixed vs estimated changepoints) |
|
| 473 |
#' |
|
| 474 |
#' @examples |
|
| 475 |
#' |
|
| 476 |
#' .monomolecularChngptForm(x = "time", 1) |
|
| 477 |
#' .monomolecularChngptForm(x = "time", 2) |
|
| 478 |
#' .monomolecularChngptForm(x = "time", 3) |
|
| 479 |
#' |
|
| 480 |
#' @return a list with form, cp, and cpInt elements. "form" is the growth formula |
|
| 481 |
#' for this phase of the model. "cp" is the inv_logit function defining when this |
|
| 482 |
#' phase should happen. "cpInt" is the value at the end of this growth phase and is |
|
| 483 |
#' used in starting the next growth phase from the right y value. |
|
| 484 |
#' |
|
| 485 |
#' @noRd |
|
| 486 | ||
| 487 |
.monomolecularChngptForm <- function(x, position = 1, dpar = NULL, priors) { # return f, cp, and cpInt
|
|
| 488 | ||
| 489 | 24x |
prefix <- chngptPrefix <- dpar |
| 490 | ||
| 491 | 24x |
if (any(grepl(paste0("fixedChangePoint", position), names(priors)))) {
|
| 492 | 12x |
changePointObj <- as.numeric(priors[[paste0(prefix, "fixedChangePoint", position)]])[1] |
| 493 | 12x |
fixed <- TRUE |
| 494 | 12x |
chngptPrefix <- NULL # never a prefix if the changepoint is a fixed value |
| 495 |
} else {
|
|
| 496 | 12x |
fixed <- FALSE |
| 497 |
} |
|
| 498 | ||
| 499 | 24x |
if (position == 1) {
|
| 500 | 8x |
if (!fixed) {
|
| 501 | 4x |
changePointObj <- "changePoint1" |
| 502 |
} |
|
| 503 | 8x |
form <- paste0( |
| 504 | 8x |
prefix, "monomolecular", position, "A-", prefix, "monomolecular", position, |
| 505 | 8x |
"A * exp(-", prefix, "monomolecular", position, "B * ", x, ")" |
| 506 |
) |
|
| 507 | 8x |
cp <- paste0("inv_logit((", chngptPrefix, changePointObj, " - ", x, ") * 5)")
|
| 508 | 8x |
cpInt <- paste0( |
| 509 | 8x |
prefix, "monomolecular", position, "A-", prefix, "monomolecular", position, |
| 510 | 8x |
"A * exp(-", prefix, "monomolecular", position, "B * ", chngptPrefix, |
| 511 | 8x |
changePointObj, ")" |
| 512 |
) |
|
| 513 |
} else {
|
|
| 514 | 16x |
all_chngpts <- names(priors)[grepl("fixedChangePoint*|changePoint*", names(priors))]
|
| 515 | 16x |
prevChangePoints <- all_chngpts[which(as.numeric(sub(".*hangePoint", "", all_chngpts)) < position)]
|
| 516 | 16x |
prevAndCurrentChangePoints <- all_chngpts[which( |
| 517 | 16x |
as.numeric(sub(".*hangePoint", "", all_chngpts)) %in% c(position, position - 1)
|
| 518 |
)] |
|
| 519 |
#* per location where "fixed" is in the prior name, replace the name with that number. |
|
| 520 | 16x |
prev_fixed_index <- which(grepl("fixed", prevChangePoints))
|
| 521 | 16x |
if (length(prev_fixed_index) > 0) {
|
| 522 | 10x |
prevChangePoints[prev_fixed_index] <- as.numeric(priors[prevChangePoints[prev_fixed_index]]) |
| 523 |
} |
|
| 524 | 16x |
pac_fixed_index <- which(grepl("fixed", prevAndCurrentChangePoints))
|
| 525 | 16x |
if (length(pac_fixed_index) > 0) {
|
| 526 | 12x |
prevAndCurrentChangePoints[pac_fixed_index] <- as.numeric( |
| 527 | 12x |
priors[prevAndCurrentChangePoints[pac_fixed_index]] |
| 528 |
) |
|
| 529 |
} |
|
| 530 | ||
| 531 | 16x |
form <- paste0( |
| 532 | 16x |
prefix, "monomolecular", position, "A-", prefix, "monomolecular", position, "A * exp(-", |
| 533 | 16x |
prefix, "monomolecular", position, "B * ", x, "-", |
| 534 | 16x |
paste0(prevChangePoints, collapse = "-"), ")" |
| 535 |
) |
|
| 536 | 16x |
cp <- paste0("inv_logit((", x, "-", paste0(prevChangePoints, collapse = "-"), ") * 5)")
|
| 537 | 16x |
cpInt <- paste0( |
| 538 | 16x |
prefix, "monomolecular", position, "A-", prefix, "monomolecular", position, "A * exp(-", |
| 539 | 16x |
prefix, "monomolecular", position, "B * ", |
| 540 | 16x |
paste0(rev(prevAndCurrentChangePoints), collapse = "-"), ")" |
| 541 |
) |
|
| 542 |
} |
|
| 543 | 24x |
pars <- paste0(prefix, "monomolecular", position, c("A", "B"))
|
| 544 | 24x |
if (!fixed) {
|
| 545 | 12x |
pars <- c(pars, paste0(chngptPrefix, "changePoint", position)) |
| 546 |
} |
|
| 547 | 24x |
return(list( |
| 548 | 24x |
"form" = form, |
| 549 | 24x |
"cp" = cp, |
| 550 | 24x |
"cpInt" = cpInt, |
| 551 | 24x |
"params" = pars |
| 552 |
)) |
|
| 553 |
} |
|
| 554 | ||
| 555 | ||
| 556 |
#* **************************************** |
|
| 557 |
#* ***** `Exponential Changepoint Phase` ***** |
|
| 558 |
#* **************************************** |
|
| 559 | ||
| 560 |
#' Exponential changepoint section function |
|
| 561 |
#' |
|
| 562 |
#' @param x X variable name |
|
| 563 |
#' @param position Position in growth formula ("1" + "2" + "3"... etc)
|
|
| 564 |
#' @param dpar string or NULL, if string should be the name of the distributional parameter |
|
| 565 |
#' @param priors a list of prior distributions (used for fixed vs estimated changepoints) |
|
| 566 |
#' |
|
| 567 |
#' @examples |
|
| 568 |
#' |
|
| 569 |
#' .exponentialChngptForm(x = "time", 1) |
|
| 570 |
#' .exponentialChngptForm(x = "time", 2) |
|
| 571 |
#' .exponentialChngptForm(x = "time", 3) |
|
| 572 |
#' |
|
| 573 |
#' @return a list with form, cp, and cpInt elements. "form" is the growth formula |
|
| 574 |
#' for this phase of the model. "cp" is the inv_logit function defining when this |
|
| 575 |
#' phase should happen. "cpInt" is the value at the end of this growth phase and is |
|
| 576 |
#' used in starting the next growth phase from the right y value. |
|
| 577 |
#' @noRd |
|
| 578 | ||
| 579 |
.exponentialChngptForm <- function(x, position = 1, dpar = NULL, priors) { # return f, cp, and cpInt
|
|
| 580 | ||
| 581 | 24x |
prefix <- chngptPrefix <- dpar |
| 582 | ||
| 583 | 24x |
if (any(grepl(paste0("fixedChangePoint", position), names(priors)))) {
|
| 584 | 12x |
changePointObj <- as.numeric(priors[[paste0(prefix, "fixedChangePoint", position)]])[1] |
| 585 | 12x |
fixed <- TRUE |
| 586 | 12x |
chngptPrefix <- NULL # never a prefix if the changepoint is a fixed value |
| 587 |
} else {
|
|
| 588 | 12x |
fixed <- FALSE |
| 589 |
} |
|
| 590 | ||
| 591 | 24x |
if (position == 1) {
|
| 592 | 8x |
if (!fixed) {
|
| 593 | 4x |
changePointObj <- "changePoint1" |
| 594 |
} |
|
| 595 | ||
| 596 | 8x |
form <- paste0( |
| 597 | 8x |
prefix, "exponential", position, "A * exp(", prefix, "exponential", position,
|
| 598 | 8x |
"B * ", x, ")" |
| 599 |
) |
|
| 600 | 8x |
cp <- paste0("inv_logit((", chngptPrefix, changePointObj, " - ", x, ") * 5)")
|
| 601 | 8x |
cpInt <- paste0( |
| 602 | 8x |
prefix, "exponential", position, "A * exp(", prefix, "exponential", position,
|
| 603 | 8x |
"B * ", chngptPrefix, changePointObj, ")" |
| 604 |
) |
|
| 605 |
} else {
|
|
| 606 | 16x |
all_chngpts <- names(priors)[grepl("fixedChangePoint*|changePoint*", names(priors))]
|
| 607 | 16x |
prevChangePoints <- all_chngpts[which(as.numeric(sub(".*hangePoint", "", all_chngpts)) < position)]
|
| 608 | 16x |
prevAndCurrentChangePoints <- all_chngpts[which( |
| 609 | 16x |
as.numeric(sub(".*hangePoint", "", all_chngpts)) %in% c(position, position - 1)
|
| 610 |
)] |
|
| 611 |
#* per location where "fixed" is in the prior name, replace the name with that number. |
|
| 612 | 16x |
prev_fixed_index <- which(grepl("fixed", prevChangePoints))
|
| 613 | 16x |
if (length(prev_fixed_index) > 0) {
|
| 614 | 10x |
prevChangePoints[prev_fixed_index] <- as.numeric(priors[prevChangePoints[prev_fixed_index]]) |
| 615 |
} |
|
| 616 | 16x |
pac_fixed_index <- which(grepl("fixed", prevAndCurrentChangePoints))
|
| 617 | 16x |
if (length(pac_fixed_index) > 0) {
|
| 618 | 12x |
prevAndCurrentChangePoints[pac_fixed_index] <- as.numeric( |
| 619 | 12x |
priors[prevAndCurrentChangePoints[pac_fixed_index]] |
| 620 |
) |
|
| 621 |
} |
|
| 622 | ||
| 623 | 16x |
form <- paste0( |
| 624 | 16x |
prefix, "exponential", position, "A * exp(", prefix, "exponential", position, "B * (",
|
| 625 | 16x |
x, "-", paste0(prevChangePoints, collapse = "-"), "))" |
| 626 |
) |
|
| 627 | 16x |
cp <- paste0("inv_logit((", x, "-", paste0(prevChangePoints, collapse = "-"), ") * 5)")
|
| 628 | 16x |
cpInt <- paste0( |
| 629 | 16x |
prefix, "exponential", position, "A * exp(", prefix, "exponential", position, "B * (",
|
| 630 | 16x |
paste0(rev(prevAndCurrentChangePoints), collapse = "-"), "))" |
| 631 |
) |
|
| 632 |
} |
|
| 633 | 24x |
pars <- paste0(prefix, "exponential", position, c("A", "B")) # this needs to be conditional on fixed
|
| 634 | 24x |
if (!fixed) {
|
| 635 | 12x |
pars <- c(pars, paste0(chngptPrefix, "changePoint", position)) |
| 636 |
} |
|
| 637 | 24x |
return(list( |
| 638 | 24x |
"form" = form, |
| 639 | 24x |
"cp" = cp, |
| 640 | 24x |
"cpInt" = cpInt, |
| 641 | 24x |
"params" = pars |
| 642 |
)) |
|
| 643 |
} |
|
| 644 | ||
| 645 |
#* **************************************** |
|
| 646 |
#* ***** `Power Law Changepoint Phase` ***** |
|
| 647 |
#* **************************************** |
|
| 648 | ||
| 649 |
#' Power Law changepoint section function |
|
| 650 |
#' |
|
| 651 |
#' @param x X variable name |
|
| 652 |
#' @param position Position in growth formula ("1" + "2" + "3"... etc)
|
|
| 653 |
#' @param dpar string or NULL, if string should be the name of the distributional parameter |
|
| 654 |
#' @param priors a list of prior distributions (used for fixed vs estimated changepoints) |
|
| 655 |
#' |
|
| 656 |
#' @examples |
|
| 657 |
#' |
|
| 658 |
#' .powerlawChngptForm(x = "time", 1) |
|
| 659 |
#' .powerlawChngptForm(x = "time", 2) |
|
| 660 |
#' .powerlawChngptForm(x = "time", 3) |
|
| 661 |
#' |
|
| 662 |
#' @return a list with form, cp, and cpInt elements. "form" is the growth formula |
|
| 663 |
#' for this phase of the model. "cp" is the inv_logit function defining when this |
|
| 664 |
#' phase should happen. "cpInt" is the value at the end of this growth phase and is |
|
| 665 |
#' used in starting the next growth phase from the right y value. |
|
| 666 |
#' |
|
| 667 |
#' @noRd |
|
| 668 | ||
| 669 |
.powerlawChngptForm <- function(x, position = 1, dpar = NULL, priors) { # return f, cp, and cpInt
|
|
| 670 | ||
| 671 | 24x |
prefix <- chngptPrefix <- dpar |
| 672 | ||
| 673 | 24x |
if (any(grepl(paste0("fixedChangePoint", position), names(priors)))) {
|
| 674 | 12x |
changePointObj <- as.numeric(priors[[paste0(prefix, "fixedChangePoint", position)]])[1] |
| 675 | 12x |
fixed <- TRUE |
| 676 | 12x |
chngptPrefix <- NULL # never a prefix if the changepoint is a fixed value |
| 677 |
} else {
|
|
| 678 | 12x |
fixed <- FALSE |
| 679 |
} |
|
| 680 | ||
| 681 | 24x |
if (position == 1) {
|
| 682 | 8x |
if (!fixed) {
|
| 683 | 4x |
changePointObj <- "changePoint1" |
| 684 |
} |
|
| 685 | 8x |
form <- paste0(prefix, "powerLaw", position, "A * ", x, "^(", prefix, "powerLaw", position, "B)")
|
| 686 | 8x |
cp <- paste0("inv_logit((", chngptPrefix, changePointObj, " - ", x, ") * 5)")
|
| 687 | 8x |
cpInt <- paste0( |
| 688 | 8x |
prefix, "powerLaw", position, "A * ", chngptPrefix, changePointObj, "^(", prefix,
|
| 689 | 8x |
"powerLaw", position, "B)" |
| 690 |
) |
|
| 691 |
} else {
|
|
| 692 | 16x |
all_chngpts <- names(priors)[grepl("fixedChangePoint*|changePoint*", names(priors))]
|
| 693 | 16x |
prevChangePoints <- all_chngpts[which(as.numeric(sub(".*hangePoint", "", all_chngpts)) < position)]
|
| 694 | 16x |
prevAndCurrentChangePoints <- all_chngpts[which( |
| 695 | 16x |
as.numeric(sub(".*hangePoint", "", all_chngpts)) %in% c(position, position - 1)
|
| 696 |
)] |
|
| 697 |
#* per location where "fixed" is in the prior name, replace the name with that number. |
|
| 698 | 16x |
prev_fixed_index <- which(grepl("fixed", prevChangePoints))
|
| 699 | 16x |
if (length(prev_fixed_index) > 0) {
|
| 700 | 10x |
prevChangePoints[prev_fixed_index] <- as.numeric(priors[prevChangePoints[prev_fixed_index]]) |
| 701 |
} |
|
| 702 | 16x |
pac_fixed_index <- which(grepl("fixed", prevAndCurrentChangePoints))
|
| 703 | 16x |
if (length(pac_fixed_index) > 0) {
|
| 704 | 12x |
prevAndCurrentChangePoints[pac_fixed_index] <- as.numeric( |
| 705 | 12x |
priors[prevAndCurrentChangePoints[pac_fixed_index]] |
| 706 |
) |
|
| 707 |
} |
|
| 708 | ||
| 709 | 16x |
form <- paste0( |
| 710 | 16x |
prefix, "powerLaw", position, "A * ", x, "-", |
| 711 | 16x |
paste0(prevChangePoints, collapse = "-"), "^(", prefix, "powerLaw", position, "B)"
|
| 712 |
) |
|
| 713 | 16x |
cp <- paste0("inv_logit((", x, "-", paste0(prevChangePoints, collapse = "-"), ") * 5)")
|
| 714 | 16x |
cpInt <- paste0( |
| 715 | 16x |
prefix, "powerLaw", position, "A * (", paste0(rev(prevAndCurrentChangePoints), collapse = "-"),
|
| 716 | 16x |
")^(", prefix, "powerLaw", position, "B)"
|
| 717 |
) |
|
| 718 |
} |
|
| 719 | 24x |
pars <- paste0(prefix, "powerLaw", position, c("A", "B"))
|
| 720 | 24x |
if (!fixed) {
|
| 721 | 12x |
pars <- c(pars, paste0(chngptPrefix, "changePoint", position)) |
| 722 |
} |
|
| 723 | 24x |
return(list( |
| 724 | 24x |
"form" = form, |
| 725 | 24x |
"cp" = cp, |
| 726 | 24x |
"cpInt" = cpInt, |
| 727 | 24x |
"params" = pars |
| 728 |
)) |
|
| 729 |
} |
|
| 730 | ||
| 731 |
#* **************************************** |
|
| 732 |
#* ***** `Logarithmic Changepoint Phase` ***** |
|
| 733 |
#* **************************************** |
|
| 734 | ||
| 735 |
#' Logarithmic changepoint section function |
|
| 736 |
#' |
|
| 737 |
#' @param x X variable name |
|
| 738 |
#' @param position Position in growth formula ("1" + "2" + "3"... etc)
|
|
| 739 |
#' @param dpar string or NULL, if string should be the name of the distributional parameter |
|
| 740 |
#' @param priors a list of prior distributions (used for fixed vs estimated changepoints) |
|
| 741 |
#' |
|
| 742 |
#' @examples |
|
| 743 |
#' |
|
| 744 |
#' .logarithmicChngptForm(x = "time", 1) |
|
| 745 |
#' .logarithmicChngptForm(x = "time", 2) |
|
| 746 |
#' .logarithmicChngptForm(x = "time", 3) |
|
| 747 |
#' |
|
| 748 |
#' @return a list with form, cp, and cpInt elements. "form" is the growth formula |
|
| 749 |
#' for this phase of the model. "cp" is the inv_logit function defining when this |
|
| 750 |
#' phase should happen. "cpInt" is the value at the end of this growth phase and is |
|
| 751 |
#' used in starting the next growth phase from the right y value. |
|
| 752 |
#' |
|
| 753 |
#' @noRd |
|
| 754 | ||
| 755 |
.logarithmicChngptForm <- function(x, position = 1, dpar = NULL, priors) { # return f, cp, and cpInt
|
|
| 756 | ||
| 757 | 24x |
prefix <- chngptPrefix <- dpar |
| 758 | ||
| 759 | 24x |
if (any(grepl(paste0("fixedChangePoint", position), names(priors)))) {
|
| 760 | 12x |
changePointObj <- as.numeric(priors[[paste0(prefix, "fixedChangePoint", position)]])[1] |
| 761 | 12x |
fixed <- TRUE |
| 762 | 12x |
chngptPrefix <- NULL # never a prefix if the changepoint is a fixed value |
| 763 |
} else {
|
|
| 764 | 12x |
fixed <- FALSE |
| 765 |
} |
|
| 766 | ||
| 767 | 24x |
if (position == 1) {
|
| 768 | 8x |
if (!fixed) {
|
| 769 | 4x |
changePointObj <- "changePoint1" |
| 770 |
} |
|
| 771 | 8x |
form <- paste0(prefix, "logarithmic", position, "A * log(", x, ")")
|
| 772 | 8x |
cp <- paste0("inv_logit((", chngptPrefix, changePointObj, " - ", x, ") * 5)")
|
| 773 | 8x |
cpInt <- paste0( |
| 774 | 8x |
prefix, "logarithmic", position, "A * log(", chngptPrefix, changePointObj, ")"
|
| 775 |
) |
|
| 776 |
} else {
|
|
| 777 | 16x |
all_chngpts <- names(priors)[grepl("fixedChangePoint*|changePoint*", names(priors))]
|
| 778 | 16x |
prevChangePoints <- all_chngpts[which(as.numeric(sub(".*hangePoint", "", all_chngpts)) < position)]
|
| 779 | 16x |
prevAndCurrentChangePoints <- all_chngpts[which( |
| 780 | 16x |
as.numeric(sub(".*hangePoint", "", all_chngpts)) %in% c(position, position - 1)
|
| 781 |
)] |
|
| 782 |
#* per location where "fixed" is in the prior name, replace the name with that number. |
|
| 783 | 16x |
prev_fixed_index <- which(grepl("fixed", prevChangePoints))
|
| 784 | 16x |
if (length(prev_fixed_index) > 0) {
|
| 785 | 10x |
prevChangePoints[prev_fixed_index] <- as.numeric(priors[prevChangePoints[prev_fixed_index]]) |
| 786 |
} |
|
| 787 | 16x |
pac_fixed_index <- which(grepl("fixed", prevAndCurrentChangePoints))
|
| 788 | 16x |
if (length(pac_fixed_index) > 0) {
|
| 789 | 12x |
prevAndCurrentChangePoints[pac_fixed_index] <- as.numeric( |
| 790 | 12x |
priors[prevAndCurrentChangePoints[pac_fixed_index]] |
| 791 |
) |
|
| 792 |
} |
|
| 793 | ||
| 794 | 16x |
form <- paste0( |
| 795 | 16x |
prefix, "logarithmic", position, "A * log(", x, "-",
|
| 796 | 16x |
paste0(prevChangePoints, collapse = "-"), ")" |
| 797 |
) |
|
| 798 | 16x |
cp <- paste0("inv_logit((", x, "-", paste0(prevChangePoints, collapse = "-"), ") * 5)")
|
| 799 | 16x |
cpInt <- paste0( |
| 800 | 16x |
prefix, "logarithmic", position, "A * log(",
|
| 801 | 16x |
paste0(rev(prevAndCurrentChangePoints), collapse = "-"), |
| 802 |
")" |
|
| 803 |
) |
|
| 804 |
} |
|
| 805 | 24x |
pars <- paste0(prefix, "powerLaw", position, c("A"))
|
| 806 | 24x |
if (!fixed) {
|
| 807 | 12x |
pars <- c(pars, paste0(chngptPrefix, "changePoint", position)) |
| 808 |
} |
|
| 809 | 24x |
return(list( |
| 810 | 24x |
"form" = form, |
| 811 | 24x |
"cp" = cp, |
| 812 | 24x |
"cpInt" = cpInt, |
| 813 | 24x |
"params" = pars |
| 814 |
)) |
|
| 815 |
} |
|
| 816 | ||
| 817 |
#* **************************************** |
|
| 818 |
#* ***** `Intercept Changepoint Phase` ***** |
|
| 819 |
#* **************************************** |
|
| 820 | ||
| 821 |
#' intercept only changepoint section function |
|
| 822 |
#' |
|
| 823 |
#' |
|
| 824 |
#' @param x X variable name |
|
| 825 |
#' @param position Position in growth formula ("1" + "2" + "3"... etc)
|
|
| 826 |
#' @param dpar string or NULL, if string should be the name of the distributional parameter |
|
| 827 |
#' @param priors a list of prior distributions (used for fixed vs estimated changepoints) |
|
| 828 |
#' |
|
| 829 |
#' @examples |
|
| 830 |
#' |
|
| 831 |
#' .intChngptForm(x = "time", 1, nTimes = 20) |
|
| 832 |
#' .intChngptForm(x = "time", 2, nTimes = 20) |
|
| 833 |
#' .intChngptForm(x = "time", 3, nTimes = 5) |
|
| 834 |
#' |
|
| 835 |
#' @return a list with form, cp, and cpInt elements. "form" is the growth formula |
|
| 836 |
#' for this phase of the model. "cp" is the inv_logit function defining when this |
|
| 837 |
#' phase should happen. "cpInt" is the value at the end of this growth phase and is |
|
| 838 |
#' used in starting the next growth phase from the right y value, for GAMs this is |
|
| 839 |
#' undefined and GAMs should only be used at the end of a segmented model. |
|
| 840 |
#' @noRd |
|
| 841 | ||
| 842 |
.intChngptForm <- function(x, position = 1, dpar = NULL, priors) { # return f, cp, and cpInt
|
|
| 843 | ||
| 844 | 26x |
prefix <- chngptPrefix <- dpar |
| 845 | ||
| 846 | 26x |
if (any(grepl(paste0("fixedChangePoint", position), names(priors)))) {
|
| 847 | 12x |
changePointObj <- as.numeric(priors[[paste0(prefix, "fixedChangePoint", position)]])[1] |
| 848 | 12x |
fixed <- TRUE |
| 849 | 12x |
chngptPrefix <- NULL # never a prefix if the changepoint is a fixed value |
| 850 |
} else {
|
|
| 851 | 14x |
fixed <- FALSE |
| 852 |
} |
|
| 853 | ||
| 854 | 26x |
if (position == 1) {
|
| 855 | 10x |
if (!fixed) {
|
| 856 | 6x |
changePointObj <- "changePoint1" |
| 857 |
} |
|
| 858 | 10x |
form <- paste0(prefix, "int", position) |
| 859 | 10x |
cp <- paste0("inv_logit((", chngptPrefix, changePointObj, " - ", x, ") * 5)")
|
| 860 | 10x |
cpInt <- paste0(prefix, "int", position) |
| 861 |
} else {
|
|
| 862 | 16x |
all_chngpts <- names(priors)[grepl("fixedChangePoint*|changePoint*", names(priors))]
|
| 863 | 16x |
prevChangePoints <- all_chngpts[which(as.numeric(sub(".*hangePoint", "", all_chngpts)) < position)]
|
| 864 | 16x |
prev_fixed_index <- which(grepl("fixed", prevChangePoints))
|
| 865 | 16x |
if (length(prev_fixed_index) > 0) {
|
| 866 | 10x |
prevChangePoints[prev_fixed_index] <- as.numeric(priors[prevChangePoints[prev_fixed_index]]) |
| 867 |
} |
|
| 868 | ||
| 869 | 16x |
form <- paste0(prefix, "int", position) |
| 870 | 16x |
cp <- paste0("inv_logit((", x, "-", paste0(prevChangePoints, collapse = "-"), ") * 5)")
|
| 871 | 16x |
cpInt <- paste0(prefix, "int", position) |
| 872 |
} |
|
| 873 | ||
| 874 | 26x |
pars <- paste0(prefix, "int", position) |
| 875 | 26x |
if (!fixed) {
|
| 876 | 14x |
pars <- c(pars, paste0(chngptPrefix, "changePoint", position)) |
| 877 |
} |
|
| 878 | ||
| 879 | 26x |
return(list( |
| 880 | 26x |
"form" = form, |
| 881 | 26x |
"cp" = cp, |
| 882 | 26x |
"cpInt" = cpInt, |
| 883 | 26x |
"params" = pars |
| 884 |
)) |
|
| 885 |
} |
|
| 886 | ||
| 887 | ||
| 888 | ||
| 889 | ||
| 890 |
#* **************************************** |
|
| 891 |
#* ***** `Gam Changepoint Phase` ***** |
|
| 892 |
#* **************************************** |
|
| 893 | ||
| 894 |
#' gam changepoint section function |
|
| 895 |
#' |
|
| 896 |
#' @param x X variable name |
|
| 897 |
#' @param position Position in growth formula ("1" + "2" + "3"... etc)
|
|
| 898 |
#' @param dpar string or NULL, if string should be the name of the distributional parameter |
|
| 899 |
#' @param priors a list of prior distributions (used for fixed vs estimated changepoints) |
|
| 900 |
#' |
|
| 901 |
#' @examples |
|
| 902 |
#' |
|
| 903 |
#' .gamChngptForm(x = "time", 1, nTimes = 20) |
|
| 904 |
#' .gamChngptForm(x = "time", 2, nTimes = 20) |
|
| 905 |
#' .gamChngptForm(x = "time", 3, nTimes = 5) |
|
| 906 |
#' |
|
| 907 |
#' @return a list with form, cp, cpInt, and splineForm elements. "form" is the growth formula |
|
| 908 |
#' for this phase of the model. "cp" is the inv_logit function defining when this |
|
| 909 |
#' phase should happen. "cpInt" is the value at the end of this growth phase and is |
|
| 910 |
#' used in starting the next growth phase from the right y value, for GAMs this is |
|
| 911 |
#' undefined and GAMs should only be used at the end of a segmented model. |
|
| 912 |
#' "splineForm" is to use in making a spline for a predictor. |
|
| 913 |
#' @noRd |
|
| 914 | ||
| 915 |
.gamChngptForm <- function(x, position = 1, dpar = NULL, priors) { # return f, cp, and cpInt
|
|
| 916 | ||
| 917 | 11x |
prefix <- chngptPrefix <- dpar |
| 918 | ||
| 919 | 10x |
if (any(grepl(paste0("fixedChangePoint", position), names(priors)))) {
|
| 920 | 4x |
fixed <- TRUE |
| 921 | 4x |
chngptPrefix <- NULL # never a prefix if the changepoint is a fixed value |
| 922 |
} else {
|
|
| 923 | 6x |
fixed <- FALSE |
| 924 |
} |
|
| 925 | ||
| 926 | 10x |
if (position == 1) {
|
| 927 | ! |
stop("GAMs are only supported as the last function of a multi-part formula")
|
| 928 |
} else {
|
|
| 929 | 10x |
all_chngpts <- names(priors)[grepl("fixedChangePoint*|changePoint*", names(priors))]
|
| 930 | 10x |
prevChangePoints <- all_chngpts[which(as.numeric(sub(".*hangePoint", "", all_chngpts)) < position)]
|
| 931 | 10x |
prev_fixed_index <- which(grepl("fixed", prevChangePoints))
|
| 932 | 10x |
if (length(prev_fixed_index) > 0) {
|
| 933 | 6x |
prevChangePoints[prev_fixed_index] <- as.numeric(priors[prevChangePoints[prev_fixed_index]]) |
| 934 |
} |
|
| 935 | ||
| 936 | 10x |
form <- paste0(prefix, "spline") |
| 937 | 10x |
cp <- paste0("inv_logit((", x, "-", paste0(prevChangePoints, collapse = "-"), ") * 5)")
|
| 938 | 10x |
cpInt <- NA |
| 939 |
} |
|
| 940 | 10x |
pars <- paste0(prefix, "spline") |
| 941 | 10x |
if (!fixed) {
|
| 942 | 6x |
pars <- c(pars, paste0(chngptPrefix, "changePoint", position)) |
| 943 |
} |
|
| 944 | 10x |
return(list( |
| 945 | 10x |
"form" = form, |
| 946 | 10x |
"cp" = cp, |
| 947 | 10x |
"cpInt" = cpInt, |
| 948 | 10x |
"params" = pars, |
| 949 | 10x |
"splineVar" = paste0(prefix, "spline") |
| 950 |
)) |
|
| 951 |
} |
|
| 952 | ||
| 953 |
#* **************************************** |
|
| 954 |
#* ***** `Gam Changepoint Phase` ***** |
|
| 955 |
#* **************************************** |
|
| 956 | ||
| 957 |
#' flip any model to a decay model |
|
| 958 |
#' |
|
| 959 |
#' @param phaseList A list returned from some *ChngptForm function |
|
| 960 |
#' |
|
| 961 |
#' @return a list with form, cp, cpInt and params for a decay segment to a model |
|
| 962 |
#' @noRd |
|
| 963 | ||
| 964 |
.decayChngptForm <- function(phaseList) {
|
|
| 965 | 3x |
phaseList$form <- paste0("-", phaseList$form)
|
| 966 | 3x |
phaseList |
| 967 |
} |
|
| 968 | ||
| 969 |
#* **************************************** |
|
| 970 |
#* ***** `Weibull Changepoint Phase` ***** |
|
| 971 |
#* **************************************** |
|
| 972 | ||
| 973 |
#' Weibull changepoint section function |
|
| 974 |
#' |
|
| 975 |
#' @param x X variable name |
|
| 976 |
#' @param position Position in growth formula ("1" + "2" + "3"... etc)
|
|
| 977 |
#' @param dpar string or NULL, if string should be the name of the distributional parameter |
|
| 978 |
#' @param priors a list of prior distributions (used for fixed vs estimated changepoints) |
|
| 979 |
#' |
|
| 980 |
#' @examples |
|
| 981 |
#' |
|
| 982 |
#' .weibullChngptForm(x = "time", 1) |
|
| 983 |
#' .weibullChngptForm(x = "time", 2) |
|
| 984 |
#' .weibullChngptForm(x = "time", 3) |
|
| 985 |
#' |
|
| 986 |
#' @return a list with form, cp, and cpInt elements. "form" is the growth formula |
|
| 987 |
#' for this phase of the model. "cp" is the inv_logit function defining when this |
|
| 988 |
#' phase should happen. "cpInt" is the value at the end of this growth phase and is |
|
| 989 |
#' used in starting the next growth phase from the right y value. |
|
| 990 |
#' |
|
| 991 |
#' @noRd |
|
| 992 | ||
| 993 |
.weibullChngptForm <- function(x, position = 1, dpar = NULL, priors) { # return f, cp, and cpInt
|
|
| 994 | ||
| 995 | 26x |
prefix <- chngptPrefix <- dpar |
| 996 | ||
| 997 | 26x |
if (any(grepl(paste0("fixedChangePoint", position), names(priors)))) {
|
| 998 | 12x |
changePointObj <- as.numeric(priors[[paste0(prefix, "fixedChangePoint", position)]])[1] |
| 999 | 12x |
fixed <- TRUE |
| 1000 | 12x |
chngptPrefix <- NULL # never a prefix if the changepoint is a fixed value |
| 1001 |
} else {
|
|
| 1002 | 14x |
fixed <- FALSE |
| 1003 |
} |
|
| 1004 | ||
| 1005 | 26x |
if (position == 1) {
|
| 1006 | 9x |
if (!fixed) {
|
| 1007 | 5x |
changePointObj <- "changePoint1" |
| 1008 |
} |
|
| 1009 | 9x |
form <- paste0( |
| 1010 | 9x |
prefix, "weibull", position, "A * (1-exp(-(", x, "/", prefix,
|
| 1011 | 9x |
"weibull", position, "C)^", prefix, "weibull", position, "B))" |
| 1012 |
) |
|
| 1013 | 9x |
cp <- paste0("inv_logit((", chngptPrefix, changePointObj, " - ", x, ") * 5)")
|
| 1014 | 9x |
cpInt <- paste0( |
| 1015 | 9x |
prefix, "weibull", position, "A * (1-exp(-(", chngptPrefix, changePointObj,
|
| 1016 | 9x |
"/", prefix, "weibull", position, "C)^", prefix, "weibull", position, "B))" |
| 1017 |
) |
|
| 1018 |
} else {
|
|
| 1019 | 17x |
all_chngpts <- names(priors)[grepl("fixedChangePoint*|changePoint*", names(priors))]
|
| 1020 | 17x |
prevChangePoints <- all_chngpts[which(as.numeric(sub(".*hangePoint", "", all_chngpts)) < position)]
|
| 1021 | 17x |
prevAndCurrentChangePoints <- all_chngpts[which( |
| 1022 | 17x |
as.numeric(sub(".*hangePoint", "", all_chngpts)) %in% c(position, position - 1)
|
| 1023 |
)] |
|
| 1024 |
#* per location where "fixed" is in the prior name, replace the name with that number. |
|
| 1025 | 17x |
prev_fixed_index <- which(grepl("fixed", prevChangePoints))
|
| 1026 | 17x |
if (length(prev_fixed_index) > 0) {
|
| 1027 | 10x |
prevChangePoints[prev_fixed_index] <- as.numeric(priors[prevChangePoints[prev_fixed_index]]) |
| 1028 |
} |
|
| 1029 | 17x |
pac_fixed_index <- which(grepl("fixed", prevAndCurrentChangePoints))
|
| 1030 | 17x |
if (length(pac_fixed_index) > 0) {
|
| 1031 | 12x |
prevAndCurrentChangePoints[pac_fixed_index] <- as.numeric( |
| 1032 | 12x |
priors[prevAndCurrentChangePoints[pac_fixed_index]] |
| 1033 |
) |
|
| 1034 |
} |
|
| 1035 | 17x |
form <- paste0( |
| 1036 | 17x |
prefix, "weibull", position, "A * (1-exp(-(", x, "-", paste0(prevChangePoints, collapse = "-"),
|
| 1037 | 17x |
"/", prefix, "weibull", position, "C)^", prefix, "weibull", position, "B))" |
| 1038 |
) |
|
| 1039 | 17x |
cp <- paste0("inv_logit((", x, "-", paste0(prevChangePoints, collapse = "-"), ") * 5)")
|
| 1040 | 17x |
cpInt <- paste0( |
| 1041 | 17x |
prefix, "weibull", position, "A * (1-exp(-(", paste0(rev(prevAndCurrentChangePoints),
|
| 1042 | 17x |
collapse = "-" |
| 1043 |
), |
|
| 1044 | 17x |
"/", prefix, "weibull", position, "C)^", prefix, "weibull", position, "B))" |
| 1045 |
) |
|
| 1046 |
} |
|
| 1047 | 26x |
pars <- paste0(prefix, "weibull", position, c("A", "B", "C"))
|
| 1048 | 26x |
if (!fixed) {
|
| 1049 | 14x |
pars <- c(pars, paste0(chngptPrefix, "changePoint", position)) |
| 1050 |
} |
|
| 1051 | 26x |
return(list( |
| 1052 | 26x |
"form" = form, |
| 1053 | 26x |
"cp" = cp, |
| 1054 | 26x |
"cpInt" = cpInt, |
| 1055 | 26x |
"params" = pars |
| 1056 |
)) |
|
| 1057 |
} |
|
| 1058 | ||
| 1059 | ||
| 1060 |
#* **************************************** |
|
| 1061 |
#* ***** `Frechet Changepoint Phase` ***** |
|
| 1062 |
#* **************************************** |
|
| 1063 | ||
| 1064 |
#' Frechet changepoint section function |
|
| 1065 |
#' |
|
| 1066 |
#' @param x X variable name |
|
| 1067 |
#' @param position Position in growth formula ("1" + "2" + "3"... etc)
|
|
| 1068 |
#' @param dpar string or NULL, if string should be the name of the distributional parameter |
|
| 1069 |
#' @param priors a list of prior distributions (used for fixed vs estimated changepoints) |
|
| 1070 |
#' |
|
| 1071 |
#' @examples |
|
| 1072 |
#' |
|
| 1073 |
#' .frechetChngptForm(x = "time", 1) |
|
| 1074 |
#' .frechetChngptForm(x = "time", 2) |
|
| 1075 |
#' .frechetChngptForm(x = "time", 3) |
|
| 1076 |
#' |
|
| 1077 |
#' @return a list with form, cp, and cpInt elements. "form" is the growth formula |
|
| 1078 |
#' for this phase of the model. "cp" is the inv_logit function defining when this |
|
| 1079 |
#' phase should happen. "cpInt" is the value at the end of this growth phase and is |
|
| 1080 |
#' used in starting the next growth phase from the right y value. |
|
| 1081 |
#' |
|
| 1082 |
#' @noRd |
|
| 1083 | ||
| 1084 |
.frechetChngptForm <- function(x, position = 1, dpar = NULL, priors) { # return f, cp, and cpInt
|
|
| 1085 | ||
| 1086 | 24x |
prefix <- chngptPrefix <- dpar |
| 1087 | ||
| 1088 | 24x |
if (any(grepl(paste0("fixedChangePoint", position), names(priors)))) {
|
| 1089 | 12x |
changePointObj <- as.numeric(priors[[paste0(prefix, "fixedChangePoint", position)]])[1] |
| 1090 | 12x |
fixed <- TRUE |
| 1091 | 12x |
chngptPrefix <- NULL # never a prefix if the changepoint is a fixed value |
| 1092 |
} else {
|
|
| 1093 | 12x |
fixed <- FALSE |
| 1094 |
} |
|
| 1095 | ||
| 1096 | 24x |
if (position == 1) {
|
| 1097 | 8x |
if (!fixed) {
|
| 1098 | 4x |
changePointObj <- "changePoint1" |
| 1099 |
} |
|
| 1100 | 8x |
form <- paste0( |
| 1101 | 8x |
prefix, "frechet", position, "A * exp(-((", x, "-0)/", prefix, "frechet",
|
| 1102 | 8x |
position, "C)^(-", prefix, "frechet", position, "B))" |
| 1103 |
) |
|
| 1104 | 8x |
cp <- paste0("inv_logit((", chngptPrefix, changePointObj, " - ", x, ") * 5)")
|
| 1105 | 8x |
cpInt <- paste0( |
| 1106 | 8x |
prefix, "frechet", position, "A * exp(-((", chngptPrefix, changePointObj, "-0)/",
|
| 1107 | 8x |
prefix, "frechet", position, "C)^(-", prefix, "frechet", position, "B))" |
| 1108 |
) |
|
| 1109 |
} else {
|
|
| 1110 | 16x |
all_chngpts <- names(priors)[grepl("fixedChangePoint*|changePoint*", names(priors))]
|
| 1111 | 16x |
prevChangePoints <- all_chngpts[which(as.numeric(sub(".*hangePoint", "", all_chngpts)) < position)]
|
| 1112 | 16x |
prevAndCurrentChangePoints <- all_chngpts[which( |
| 1113 | 16x |
as.numeric(sub(".*hangePoint", "", all_chngpts)) %in% c(position, position - 1)
|
| 1114 |
)] |
|
| 1115 |
#* per location where "fixed" is in the prior name, replace the name with that number. |
|
| 1116 | 16x |
prev_fixed_index <- which(grepl("fixed", prevChangePoints))
|
| 1117 | 16x |
if (length(prev_fixed_index) > 0) {
|
| 1118 | 10x |
prevChangePoints[prev_fixed_index] <- as.numeric(priors[prevChangePoints[prev_fixed_index]]) |
| 1119 |
} |
|
| 1120 | 16x |
pac_fixed_index <- which(grepl("fixed", prevAndCurrentChangePoints))
|
| 1121 | 16x |
if (length(pac_fixed_index) > 0) {
|
| 1122 | 12x |
prevAndCurrentChangePoints[pac_fixed_index] <- as.numeric( |
| 1123 | 12x |
priors[prevAndCurrentChangePoints[pac_fixed_index]] |
| 1124 |
) |
|
| 1125 |
} |
|
| 1126 | ||
| 1127 | 16x |
form <- paste0( |
| 1128 | 16x |
prefix, "frechet", position, "A * exp(-((", x, "-", paste0(prevChangePoints, collapse = "-"),
|
| 1129 | 16x |
"-0)/", prefix, "frechet", position, "C)^(-", prefix, "frechet", position, "B))" |
| 1130 |
) |
|
| 1131 | 16x |
cp <- paste0("inv_logit((", x, "-", paste0(prevChangePoints, collapse = "-"), ") * 5)")
|
| 1132 | 16x |
cpInt <- paste0( |
| 1133 | 16x |
prefix, "frechet", position, "A * exp(-((", paste0(rev(prevAndCurrentChangePoints),
|
| 1134 | 16x |
collapse = "-" |
| 1135 |
), |
|
| 1136 | 16x |
"-0)/", prefix, "frechet", position, "C)^(-", prefix, "frechet", position, "B))" |
| 1137 |
) |
|
| 1138 |
} |
|
| 1139 | 24x |
pars <- paste0(prefix, "frechet", position, c("A", "B", "C"))
|
| 1140 | 24x |
if (!fixed) {
|
| 1141 | 12x |
pars <- c(pars, paste0(chngptPrefix, "changePoint", position)) |
| 1142 |
} |
|
| 1143 | 24x |
return(list( |
| 1144 | 24x |
"form" = form, |
| 1145 | 24x |
"cp" = cp, |
| 1146 | 24x |
"cpInt" = cpInt, |
| 1147 | 24x |
"params" = pars |
| 1148 |
)) |
|
| 1149 |
} |
|
| 1150 | ||
| 1151 | ||
| 1152 |
#* **************************************** |
|
| 1153 |
#* ***** `Gumbel Changepoint Phase` ***** |
|
| 1154 |
#* **************************************** |
|
| 1155 | ||
| 1156 |
#' Gumbel changepoint section function |
|
| 1157 |
#' |
|
| 1158 |
#' @param x X variable name |
|
| 1159 |
#' @param position Position in growth formula ("1" + "2" + "3"... etc)
|
|
| 1160 |
#' @param dpar string or NULL, if string should be the name of the distributional parameter |
|
| 1161 |
#' @param priors a list of prior distributions (used for fixed vs estimated changepoints) |
|
| 1162 |
#' |
|
| 1163 |
#' @examples |
|
| 1164 |
#' |
|
| 1165 |
#' .gumbelChngptForm(x = "time", 1) |
|
| 1166 |
#' .gumbelChngptForm(x = "time", 2) |
|
| 1167 |
#' .gumbelChngptForm(x = "time", 3) |
|
| 1168 |
#' |
|
| 1169 |
#' @return a list with form, cp, and cpInt elements. "form" is the growth formula |
|
| 1170 |
#' for this phase of the model. "cp" is the inv_logit function defining when this |
|
| 1171 |
#' phase should happen. "cpInt" is the value at the end of this growth phase and is |
|
| 1172 |
#' used in starting the next growth phase from the right y value. |
|
| 1173 |
#' |
|
| 1174 |
#' @noRd |
|
| 1175 | ||
| 1176 |
.gumbelChngptForm <- function(x, position = 1, dpar = NULL, priors) { # return f, cp, and cpInt
|
|
| 1177 | ||
| 1178 | ||
| 1179 | 24x |
prefix <- chngptPrefix <- dpar |
| 1180 | ||
| 1181 | 24x |
if (any(grepl(paste0("fixedChangePoint", position), names(priors)))) {
|
| 1182 | 12x |
changePointObj <- as.numeric(priors[[paste0(prefix, "fixedChangePoint", position)]])[1] |
| 1183 | 12x |
fixed <- TRUE |
| 1184 | 12x |
chngptPrefix <- NULL # never a prefix if the changepoint is a fixed value |
| 1185 |
} else {
|
|
| 1186 | 12x |
fixed <- FALSE |
| 1187 |
} |
|
| 1188 | ||
| 1189 | 24x |
if (position == 1) {
|
| 1190 | 8x |
if (!fixed) {
|
| 1191 | 4x |
changePointObj <- "changePoint1" |
| 1192 |
} |
|
| 1193 | 8x |
form <- paste0( |
| 1194 | 8x |
prefix, "gumbel", position, "A * exp(-exp(-(", x, "-",
|
| 1195 | 8x |
prefix, "gumbel", position, "B)/", prefix, "gumbel", position, "C))" |
| 1196 |
) |
|
| 1197 | 8x |
cp <- paste0("inv_logit((", chngptPrefix, changePointObj, " - ", x, ") * 5)")
|
| 1198 | 8x |
cpInt <- paste0( |
| 1199 | 8x |
prefix, "gumbel", position, "A * exp(-exp(-(", chngptPrefix, changePointObj, "-",
|
| 1200 | 8x |
prefix, "gumbel", position, "B)/", prefix, "gumbel", position, "C))" |
| 1201 |
) |
|
| 1202 |
} else {
|
|
| 1203 | 16x |
all_chngpts <- names(priors)[grepl("fixedChangePoint*|changePoint*", names(priors))]
|
| 1204 | 16x |
prevChangePoints <- all_chngpts[which(as.numeric(sub(".*hangePoint", "", all_chngpts)) < position)]
|
| 1205 | 16x |
prevAndCurrentChangePoints <- all_chngpts[which( |
| 1206 | 16x |
as.numeric(sub(".*hangePoint", "", all_chngpts)) %in% c(position, position - 1)
|
| 1207 |
)] |
|
| 1208 |
#* per location where "fixed" is in the prior name, replace the name with that number. |
|
| 1209 | 16x |
prev_fixed_index <- which(grepl("fixed", prevChangePoints))
|
| 1210 | 16x |
if (length(prev_fixed_index) > 0) {
|
| 1211 | 10x |
prevChangePoints[prev_fixed_index] <- as.numeric(priors[prevChangePoints[prev_fixed_index]]) |
| 1212 |
} |
|
| 1213 | 16x |
pac_fixed_index <- which(grepl("fixed", prevAndCurrentChangePoints))
|
| 1214 | 16x |
if (length(pac_fixed_index) > 0) {
|
| 1215 | 12x |
prevAndCurrentChangePoints[pac_fixed_index] <- as.numeric( |
| 1216 | 12x |
priors[prevAndCurrentChangePoints[pac_fixed_index]] |
| 1217 |
) |
|
| 1218 |
} |
|
| 1219 | ||
| 1220 | 16x |
form <- paste0( |
| 1221 | 16x |
prefix, "gumbel", position, "A * exp(-exp(-(", x, "-", paste0(prevChangePoints, collapse = "-"),
|
| 1222 | 16x |
"-", prefix, "gumbel", position, "B)/", prefix, "gumbel", position, "C))" |
| 1223 |
) |
|
| 1224 | 16x |
cp <- paste0("inv_logit((", x, "-", paste0(prevChangePoints, collapse = "-"), ") * 5)")
|
| 1225 | 16x |
cpInt <- paste0( |
| 1226 | 16x |
prefix, "gumbel", position, "A * exp(-exp(-(", paste0(rev(prevAndCurrentChangePoints),
|
| 1227 | 16x |
collapse = "-" |
| 1228 |
), |
|
| 1229 | 16x |
"-", prefix, "gumbel", position, "B)/", prefix, "gumbel", position, "C))" |
| 1230 |
) |
|
| 1231 |
} |
|
| 1232 | 24x |
pars <- paste0(prefix, "gumbel", position, c("A", "B", "C"))
|
| 1233 | 24x |
if (!fixed) {
|
| 1234 | 12x |
pars <- c(pars, paste0(chngptPrefix, "changePoint", position)) |
| 1235 |
} |
|
| 1236 | 24x |
return(list( |
| 1237 | 24x |
"form" = form, |
| 1238 | 24x |
"cp" = cp, |
| 1239 | 24x |
"cpInt" = cpInt, |
| 1240 | 24x |
"params" = pars |
| 1241 |
)) |
|
| 1242 |
} |
| 1 |
#' @description |
|
| 2 |
#' Internal function for calculating the gamma distributed scale parameter of a pareto distribution |
|
| 3 |
#' represented by multi value traits. |
|
| 4 |
#' @param s1 A data.frame or matrix of multi value traits. |
|
| 5 |
#' @examples |
|
| 6 |
#' library(extraDistr) |
|
| 7 |
#' s1 <- mvSim( |
|
| 8 |
#' dists = list(rpareto = list(a = 1, b = 1)), |
|
| 9 |
#' n_samples = 10, |
|
| 10 |
#' counts = 50, |
|
| 11 |
#' min_bin = 1, |
|
| 12 |
#' max_bin = 180, |
|
| 13 |
#' wide = TRUE |
|
| 14 |
#' )[, -1] |
|
| 15 |
#' out <- .conj_pareto_mv(s1, cred.int.level = 0.95) |
|
| 16 |
#' lapply(out, head) |
|
| 17 |
#' |
|
| 18 |
#' @keywords internal |
|
| 19 |
#' @noRd |
|
| 20 |
.conj_pareto_mv <- function(s1 = NULL, priors = NULL, |
|
| 21 |
plot = FALSE, support = NULL, cred.int.level = NULL, |
|
| 22 |
calculatingSupport = FALSE) {
|
|
| 23 | 4x |
out <- list() |
| 24 |
#* `N observations` |
|
| 25 | 4x |
n_obs <- nrow(s1) |
| 26 |
#* `Reorder columns if they are not in the numeric order` |
|
| 27 | 4x |
histColsBin <- as.numeric(sub("[a-zA-Z_.]+", "", colnames(s1)))
|
| 28 | 4x |
bins_order <- sort(histColsBin, index.return = TRUE)$ix |
| 29 | 4x |
s1 <- s1[, bins_order] |
| 30 |
#* `Min non-zero bin` |
|
| 31 | 4x |
obs_min <- min(unlist(lapply(seq_len(n_obs), function(i) {
|
| 32 | 120x |
col <- colnames(s1)[which(s1[i, ] > 0)][1] |
| 33 | 120x |
as.numeric(gsub("[a-zA-Z]_*", "", col))
|
| 34 | 4x |
})), na.rm = TRUE) |
| 35 |
#* `make default prior if none provided` |
|
| 36 | 4x |
if (is.null(priors)) {
|
| 37 | 4x |
priors <- list(a = 0.5, b = 0.5, known_location = obs_min) |
| 38 |
} |
|
| 39 |
#* `Note, the product of the data is not an obvious quantity with MV trait data.` |
|
| 40 |
#* I talk about this some in my notes on 5/30/2024 but I think it might be a good chance |
|
| 41 |
#* to try the alternative mv trait method that I'd been considering. |
|
| 42 |
#* `Calculate Sufficient Statistics` |
|
| 43 |
#* This is abnormal because one of the sufficient statistics is the product of the data. |
|
| 44 |
#* That quantity does not translate well to the MV trait setting. |
|
| 45 |
#* `MLE Estimates of Pareto Parameters` |
|
| 46 |
#* Note this is being done per row of the MV data |
|
| 47 | 4x |
row_scales <- unlist(lapply(seq_len(n_obs), function(i) {
|
| 48 | 120x |
d <- s1[i, ] |
| 49 |
#* `Turn s1 matrix into a vector` |
|
| 50 | 120x |
X1 <- rep(histColsBin[bins_order], as.numeric(round(colSums(d)))) |
| 51 |
#* `Estimate parameters of pareto distribution of this row` |
|
| 52 | 120x |
scale_mle <- sum(d) / sum(log(X1)) |
| 53 | 120x |
return(scale_mle) |
| 54 |
})) |
|
| 55 | 4x |
scale_estimate <- mean(row_scales) |
| 56 | 4x |
sv_draws <- extraDistr::rpareto(n_obs, scale_estimate, priors$known_location) |
| 57 |
#* `Update gamma prior with sufficient statistics` |
|
| 58 | 4x |
n <- length(sv_draws) |
| 59 | 4x |
m <- prod(sv_draws) |
| 60 | 4x |
a_prime <- priors$a + n |
| 61 | 4x |
b_prime <- 1 / (1 / priors$b + log(m) - n * log(priors$known_location)) |
| 62 |
#* `Define support if it is missing` |
|
| 63 | 4x |
if (is.null(support) && calculatingSupport) {
|
| 64 | 2x |
quantiles <- qgamma(c(0.0001, 0.9999), a_prime, b_prime) |
| 65 | 2x |
return(quantiles) |
| 66 |
} |
|
| 67 |
#* `Make Posterior Draws` |
|
| 68 | 2x |
out$posteriorDraws <- rgamma(10000, a_prime, b_prime) |
| 69 |
#* `posterior` |
|
| 70 | 2x |
dens1 <- dgamma(support, a_prime, b_prime) |
| 71 | 2x |
pdf1 <- dens1 / sum(dens1) |
| 72 | 2x |
out$pdf <- pdf1 |
| 73 | 2x |
hde1 <- .gammaHDE(shape = a_prime, scale = 1 / b_prime) |
| 74 | 2x |
hdi1 <- qgamma( |
| 75 | 2x |
c((1 - cred.int.level) / 2, (1 - ((1 - cred.int.level) / 2))), |
| 76 | 2x |
a_prime, b_prime |
| 77 |
) |
|
| 78 |
#* `Store summary` |
|
| 79 | 2x |
out$summary <- data.frame(HDE_1 = hde1, HDI_1_low = hdi1[1], HDI_1_high = hdi1[2]) |
| 80 | 2x |
out$posterior <- list( |
| 81 | 2x |
"a" = a_prime, |
| 82 | 2x |
"b" = b_prime, |
| 83 | 2x |
"known_location" = priors$known_location |
| 84 |
) |
|
| 85 |
#* `save s1 data for plotting` |
|
| 86 | 2x |
if (plot) {
|
| 87 | 2x |
out$plot_df <- data.frame( |
| 88 | 2x |
"range" = support, |
| 89 | 2x |
"prob" = pdf1, |
| 90 | 2x |
"sample" = rep("Sample 1", length(support))
|
| 91 |
) |
|
| 92 |
} |
|
| 93 | 2x |
return(out) |
| 94 |
} |
|
| 95 | ||
| 96 |
#' @description |
|
| 97 |
#' Internal function for calculating the gamma distributed scale parameter of a pareto distribution |
|
| 98 |
#' represented by single value traits. |
|
| 99 |
#' @param s1 A vector of numerics drawn from a pareto distribution. |
|
| 100 |
#' @examples |
|
| 101 |
#' out <- .conj_pareto_sv( |
|
| 102 |
#' s1 = runif(10, 1, 1), cred.int.level = 0.95, |
|
| 103 |
#' plot = FALSE |
|
| 104 |
#' ) |
|
| 105 |
#' lapply(out, head) |
|
| 106 |
#' @keywords internal |
|
| 107 |
#' @noRd |
|
| 108 |
.conj_pareto_sv <- function(s1 = NULL, priors = NULL, |
|
| 109 |
plot = FALSE, support = NULL, cred.int.level = NULL, |
|
| 110 |
calculatingSupport = FALSE) {
|
|
| 111 | 4x |
out <- list() |
| 112 |
#* `make default prior if none provided` |
|
| 113 | 4x |
if (is.null(priors)) {
|
| 114 | 4x |
priors <- list(a = 0.5, b = 0.5, known_location = floor(min(s1))) |
| 115 |
} |
|
| 116 |
#* `Update gamma prior with sufficient statistics` |
|
| 117 | 4x |
n <- length(s1) |
| 118 | 4x |
m <- prod(s1) |
| 119 | 4x |
a_prime <- priors$a + n |
| 120 | 4x |
b_prime <- 1 / (1 / priors$b + log(m) - n * log(priors$known_location)) |
| 121 |
#* `Define support if it is missing` |
|
| 122 | 4x |
if (is.null(support) && calculatingSupport) {
|
| 123 | 2x |
quantiles <- qgamma(c(0.0001, 0.9999), a_prime, b_prime) |
| 124 | 2x |
return(quantiles) |
| 125 |
} |
|
| 126 |
#* `Make Posterior Draws` |
|
| 127 | 2x |
out$posteriorDraws <- rgamma(10000, a_prime, b_prime) |
| 128 |
#* `posterior` |
|
| 129 | 2x |
dens1 <- dgamma(support, a_prime, b_prime) |
| 130 | 2x |
pdf1 <- dens1 / sum(dens1) |
| 131 | 2x |
out$pdf <- pdf1 |
| 132 | 2x |
hde1 <- .gammaHDE(shape = a_prime, scale = 1 / b_prime) |
| 133 | 2x |
hdi1 <- qgamma( |
| 134 | 2x |
c((1 - cred.int.level) / 2, (1 - ((1 - cred.int.level) / 2))), |
| 135 | 2x |
a_prime, b_prime |
| 136 |
) |
|
| 137 |
#* `Store summary` |
|
| 138 | 2x |
out$summary <- data.frame(HDE_1 = hde1, HDI_1_low = hdi1[1], HDI_1_high = hdi1[2]) |
| 139 | 2x |
out$posterior <- list( |
| 140 | 2x |
"a" = a_prime, |
| 141 | 2x |
"b" = b_prime, |
| 142 | 2x |
"known_location" = priors$known_location |
| 143 |
) |
|
| 144 |
#* `save s1 data for plotting` |
|
| 145 | 2x |
if (plot) {
|
| 146 | 2x |
out$plot_df <- data.frame( |
| 147 | 2x |
"range" = support, |
| 148 | 2x |
"prob" = pdf1, |
| 149 | 2x |
"sample" = rep("Sample 1", length(support))
|
| 150 |
) |
|
| 151 |
} |
|
| 152 | 2x |
return(out) |
| 153 |
} |
| 1 |
#' Ease of use brms starter function for 6 growth model parameterizations |
|
| 2 |
#' |
|
| 3 |
#' @param model The name of a model as a character string. |
|
| 4 |
#' Supported options are "logistic", "gompertz", "frechet", "gumbel", "weibull", |
|
| 5 |
#' "monomolecular", "exponential", "linear", "logarithmic", |
|
| 6 |
#' "power law", "double logistic", "double gompertz", and "gam". |
|
| 7 |
#' See \code{\link{growthSim}} for examples of each type of growth curve.
|
|
| 8 |
#' @param form A formula describing the model. The left hand side should only be |
|
| 9 |
#' the outcome variable (phenotype). The right hand side needs at least the x variable |
|
| 10 |
#' (typically time). Grouping is also described in this formula using roughly lme4 |
|
| 11 |
#' style syntax,with formulas like \code{y~time|individual/group} to show that predictors
|
|
| 12 |
#' should vary by \code{group} and autocorrelation between \code{individual:group}
|
|
| 13 |
#' interactions should be modeled. If group has only one level or is not included then |
|
| 14 |
#' it will be ignored in formulas for growth and variance (this may be the case if |
|
| 15 |
#' you split data before fitting models to be able to run more smaller models each more quickly). |
|
| 16 |
#' @param sigma A model for heteroskedasticity from the same list of options as the model argument. |
|
| 17 |
#' @param df A dataframe to use. Must contain all the variables listed in the formula. |
|
| 18 |
#' @param priors A named list of means for prior distributions. |
|
| 19 |
#' Currently this function makes lognormal priors for all growth model parameters |
|
| 20 |
#' and T_5(mu, 3) priors for changepoint parameters. |
|
| 21 |
#' This is done because the values are strictly positive and the lognormal distribution |
|
| 22 |
#' is easily interpreted. The changepoint priors are T distributions for symmetry, 5 DF |
|
| 23 |
#' having been chosen for heavy but not unmanageable tails. |
|
| 24 |
#' If this argument is not provided then priors are not |
|
| 25 |
#' returned and a different set of priors will need to be made for the model using |
|
| 26 |
#' \code{brms::set_prior}. This works similarly to the \code{params} argument
|
|
| 27 |
#' in \code{growthSim}. Names should correspond to parameter names from the
|
|
| 28 |
#' \code{model} argument. A numeric vector can also be used, but specifying
|
|
| 29 |
#' names is best practice for clarity. Additionally, due to a limitation in |
|
| 30 |
#' \code{brms} currently lower bounds cannot be set for priors for specific groups.
|
|
| 31 |
#' If priors include multiple groups (\code{priors = list(A = c(10,15), ...)}) then
|
|
| 32 |
#' you will see warnings after the model is fit about not having specified a lower |
|
| 33 |
#' bound explicitly. Those warnings can safely be ignored and will be addressed if |
|
| 34 |
#' the necessary features are added to \code{brms}. For GAMs priors are not created by
|
|
| 35 |
#' this function but can still be provided as a \code{brmsprior} object.
|
|
| 36 |
#' See details for guidance. |
|
| 37 |
#' @param int Logical, should an intercept term be included? |
|
| 38 |
#' @keywords Bayesian, brms |
|
| 39 |
#' |
|
| 40 |
#' @importFrom stats as.formula rgamma |
|
| 41 |
#' |
|
| 42 |
#' @details |
|
| 43 |
#' |
|
| 44 |
#' Default informative priors are not provided, |
|
| 45 |
#' but these can serve as starting points for each distribution. |
|
| 46 |
#' You are encouraged to use \code{growthSim} to consider what kind
|
|
| 47 |
#' of trendlines result from changes to your prior and for interpretation of each parameter. |
|
| 48 |
#' You should not looking back and forth at your data trying to match your |
|
| 49 |
#' observed growth exactly with a prior distribution, |
|
| 50 |
#' rather this should be informed by an understanding of the plants you |
|
| 51 |
#' are using and expectations based on previous research. |
|
| 52 |
#' For the "double" models the parameter interpretation is the same |
|
| 53 |
#' as for their non-double counterparts except that there are A and A2, etc. |
|
| 54 |
#' It is strongly recommended to familiarize yourself with the double sigmoid |
|
| 55 |
#' distributions using growthSim before attempting to model one. Additionally, |
|
| 56 |
#' those distributions are intended for use with long delays in an experiment, |
|
| 57 |
#' think stress recovery experiments, not for minor hiccups in plant growth. |
|
| 58 |
#' |
|
| 59 |
#' \itemize{
|
|
| 60 |
#' \item \bold{Logistic}: \code{list('A' = 130, 'B' = 12, 'C' = 3)}
|
|
| 61 |
#' \item \bold{Gompertz}: \code{list('A' = 130, 'B' = 12, 'C' = 1.25)}
|
|
| 62 |
#' \item \bold{Double Logistic}: \code{list('A' = 130, 'B' = 12, 'C' = 3,
|
|
| 63 |
#' 'A2' = 200, 'B2' = 25, 'C2' = 1)} |
|
| 64 |
#' \item \bold{Double Gompertz}: \code{list('A' = 130, 'B' = 12, 'C' = 0.25,
|
|
| 65 |
#' 'A2' = 220, 'B2' = 30, 'C2' = 0.1)} |
|
| 66 |
#' \item \bold{Monomolecular}: \code{list('A' = 130, 'B' = 2)}
|
|
| 67 |
#' \item \bold{Exponential}: \code{list('A' = 15, 'B' = 0.1)}
|
|
| 68 |
#' \item \bold{Linear}: \code{list('A' = 1)}
|
|
| 69 |
#' \item \bold{Power Law}: \code{list('A' = 13, 'B' = 2)}
|
|
| 70 |
#' } |
|
| 71 |
#' |
|
| 72 |
#' |
|
| 73 |
#' |
|
| 74 |
#' The \code{sigma} argument optionally specifies a sub model to account for heteroskedasticity.
|
|
| 75 |
#' Currently there are four supported sub models described below. |
|
| 76 |
#' |
|
| 77 |
#' \itemize{
|
|
| 78 |
#' \item \bold{homo}: \code{sigma ~ 1}, fitting only a global or per group intercept to sigma.
|
|
| 79 |
#' \item \bold{linear}: \code{sigma ~ time}, modeling sigma with a linear relationship to time
|
|
| 80 |
#' and possibly with an interaction term per groups. |
|
| 81 |
#' \item \bold{spline}: \code{sigma ~ s(time)}, modeling sigma using a smoothing function through
|
|
| 82 |
#' `mgcv::s`, possibly by group. |
|
| 83 |
#' \item \bold{gompertz}: \code{sigma ~ subA * exp(-subB * exp(-subC * x))},
|
|
| 84 |
#' modeling sigma as a gompertz function of time, possibly by group. Note that you |
|
| 85 |
#' should specify priors for the parameters in this sub model by adding them into the \code{priors}
|
|
| 86 |
#' argument, such as \code{list(..., subA = 20, subB = 15, subC = 0.25)}. If you do not specify
|
|
| 87 |
#' priors then default (flat) priors will be used, which is liable to cause fitting problems |
|
| 88 |
#' and less accurate results. Looking at your data and making a semi-informed estimate of the |
|
| 89 |
#' total variance at the end of the experiment can help set a reasonable prior for subA, while |
|
| 90 |
#' subB and subC can generally be the same as B and C in a gompertz growth model of the same data. |
|
| 91 |
#' These priors will have fat tails so they are pretty forgiving. |
|
| 92 |
#' } |
|
| 93 |
#' |
|
| 94 |
#' |
|
| 95 |
#' @return A named list of elements to make it easier to fit common brms models. |
|
| 96 |
#' \code{formula}: A \code{brms::bf} formula specifying the growth model, autocorrelation, variance
|
|
| 97 |
#' submodel, and models for each variable in the growth model. |
|
| 98 |
#' \code{prior}: A brmsprior/data.frame object.
|
|
| 99 |
#' \code{initfun}: A function to randomly initialize chains using a random draw from a gamma
|
|
| 100 |
#' distribution (confines initial values to positive and makes correct number |
|
| 101 |
#' of initial values for chains and groups). For "gam" models this initializes all chains at 0. |
|
| 102 |
#' \code{df} The data input, possibly with dummy variables added if needed.
|
|
| 103 |
#' \code{family} The model family, currently this will always be "student".
|
|
| 104 |
#' \code{pcvrForm} The form argument unchanged. This is returned so that
|
|
| 105 |
#' it can be used later on in model visualization. Often it may be a good idea |
|
| 106 |
#' to save the output of this function with the fit model, so having this can |
|
| 107 |
#' be useful later on. |
|
| 108 |
#' @examples |
|
| 109 |
#' |
|
| 110 |
#' simdf <- growthSim("logistic",
|
|
| 111 |
#' n = 20, t = 25, |
|
| 112 |
#' params = list("A" = c(200, 160), "B" = c(13, 11), "C" = c(3, 3.5))
|
|
| 113 |
#' ) |
|
| 114 |
#' ss <- .brmSS( |
|
| 115 |
#' model = "logistic", form = y ~ time | id / group, |
|
| 116 |
#' sigma = "spline", df = simdf, priors = list("A" = 130, "B" = 12, "C" = 3)
|
|
| 117 |
#' ) |
|
| 118 |
#' lapply(ss, class) |
|
| 119 |
#' ss$initfun() |
|
| 120 |
#' |
|
| 121 |
#' @keywords internal |
|
| 122 |
#' @noRd |
|
| 123 | ||
| 124 |
.brmSS <- function(model, form, sigma = NULL, df, priors = NULL, int = FALSE, hierarchy = NULL) {
|
|
| 125 | 39x |
out <- list() |
| 126 | 39x |
models <- c( |
| 127 | 39x |
"int", "logistic", "gompertz", "monomolecular", "exponential", "linear", "power law", "logarithmic", |
| 128 | 39x |
"double logistic", "double gompertz", "gam", "spline", "homo", "frechet", "gumbel", "weibull", |
| 129 | 39x |
"not_estimated", "bragg", "lorentz", "beta" |
| 130 |
) |
|
| 131 |
#* ***** `Make bayesian formula` ***** |
|
| 132 |
#* `parse form argument` |
|
| 133 | 39x |
parsed_form <- .parsePcvrForm(form, df) |
| 134 | 39x |
y <- parsed_form$y |
| 135 | 39x |
if (grepl("\\[", y)) {
|
| 136 | 1x |
lims <- gsub(".*\\[|\\]", "", y)
|
| 137 | 1x |
lims <- as.numeric(strsplit(lims, ",")[[1]]) |
| 138 | 1x |
lower <- lims[1] |
| 139 | 1x |
upper <- lims[2] |
| 140 | 1x |
y <- trimws(gsub("\\[.*", paste0("|trunc(lb=", lower, ", ub=", upper, ")"), y))
|
| 141 |
} |
|
| 142 | 39x |
x <- parsed_form$x |
| 143 | 39x |
individual <- parsed_form$individual |
| 144 | 39x |
group <- parsed_form$group |
| 145 | 39x |
USEGROUP <- parsed_form$USEG |
| 146 | 39x |
USEINDIVIDUAL <- parsed_form$USEID |
| 147 | 39x |
df <- parsed_form$data |
| 148 | 39x |
hierarchical_predictor <- parsed_form$hierarchical_predictor |
| 149 | ||
| 150 |
#* `convert group to character to avoid unexpected factor stuff` |
|
| 151 | 39x |
df[[group]] <- as.character(df[[group]]) |
| 152 |
#* `Make autocorrelation formula` |
|
| 153 | 39x |
if (USEINDIVIDUAL) {
|
| 154 | 36x |
corForm <- as.formula(paste0("~arma(~", x, "|", individual, ":", group, ",1,1)"))
|
| 155 |
} else {
|
|
| 156 | 3x |
corForm <- NULL |
| 157 |
} |
|
| 158 |
#* `get family specific elements` |
|
| 159 | 39x |
family_res <- .brmFamilyHelper(model) |
| 160 | 39x |
model <- family_res$rhs |
| 161 | 39x |
family <- family_res$family |
| 162 | 39x |
dpars <- family_res$dpars |
| 163 |
#* `Reformat sigma if it is a formula` |
|
| 164 | 39x |
sigma <- .sigmaHelper(sigma, dpars, family, models) |
| 165 |
#* `match args` |
|
| 166 | 39x |
matchGrowthModelRes <- .matchGrowthModel(model, models) |
| 167 | 39x |
matched_model <- matchGrowthModelRes[["model"]] |
| 168 | 39x |
decay <- matchGrowthModelRes[["decay"]] |
| 169 |
#* `Make growth formula` |
|
| 170 | 39x |
nTimes <- min(unlist(lapply(split(df, df[[group]]), function(d) {
|
| 171 | 97x |
length(unique(d[[x]])) |
| 172 | 39x |
}))) # for spline knots |
| 173 | 39x |
splineHelperForm <- NULL |
| 174 | 39x |
if (grepl("\\+", model)) {
|
| 175 | 2x |
chngptHelperList <- .brmsChangePointHelper(model, x, y, group, |
| 176 | 2x |
dpar = FALSE, nTimes = nTimes, |
| 177 | 2x |
useGroup = USEGROUP, priors = priors, int = int |
| 178 |
) |
|
| 179 | 2x |
growthForm <- chngptHelperList$growthForm |
| 180 | 2x |
pars <- chngptHelperList$pars |
| 181 | 2x |
splineHelperForm <- chngptHelperList$splineHelperForm |
| 182 |
} else {
|
|
| 183 | 37x |
matched_model <- gsub("homo", "int", matched_model)
|
| 184 | 37x |
matched_model <- gsub("spline", "gam", matched_model)
|
| 185 | ||
| 186 | 37x |
stringBrmsFormFun <- paste0(".brms_form_", gsub(" ", "", matched_model))
|
| 187 | 37x |
brmsFormFun <- match.fun(stringBrmsFormFun) |
| 188 | 37x |
formRes <- brmsFormFun(x, y, group, |
| 189 | 37x |
dpar = FALSE, nTimes = nTimes, |
| 190 | 37x |
useGroup = USEGROUP, prior = priors, int = int |
| 191 |
) |
|
| 192 | 37x |
if (decay) {
|
| 193 | 1x |
formRes <- .brms_form_decay(formRes, int) |
| 194 |
} |
|
| 195 | 37x |
pars <- formRes$pars |
| 196 | 37x |
growthForm <- formRes$form |
| 197 |
} |
|
| 198 | ||
| 199 |
#* `Make distributional parameter formulas` |
|
| 200 |
#* Note there is always a sigma after .sigmaHelper (it just might be homoskedastic) |
|
| 201 | 39x |
dpar_res <- lapply(seq_along(sigma), function(i) {
|
| 202 | 77x |
dpar <- names(sigma)[[i]] |
| 203 | 77x |
model <- sigma[[i]] |
| 204 | 77x |
intModelRes <- .intModelHelper(model) |
| 205 | 77x |
model <- intModelRes$model |
| 206 | 77x |
sigmaInt <- intModelRes$int |
| 207 | 77x |
.brmDparHelper(dpar, model, x, group, nTimes, USEGROUP, priors, sigmaInt) |
| 208 |
}) |
|
| 209 | 39x |
names(dpar_res) <- names(sigma) |
| 210 | 39x |
dparForm <- unlist(lapply(dpar_res, function(res) {
|
| 211 | 77x |
res$dparForm |
| 212 |
})) |
|
| 213 | 39x |
dparSplineHelperForm <- unlist(lapply(dpar_res, function(res) {
|
| 214 | 77x |
res$dparSplineHelperForm |
| 215 |
})) |
|
| 216 | 39x |
dpar_pars <- unlist(lapply(dpar_res, function(res) {
|
| 217 | 77x |
res$dpar_pars |
| 218 |
})) |
|
| 219 | 39x |
names(dpar_pars) <- NULL |
| 220 | 39x |
pars <- append(pars, dpar_pars) |
| 221 | 39x |
pars <- pars[!grepl("spline", pars)]
|
| 222 | ||
| 223 |
#* `Make hierarchical parameter model formulas` |
|
| 224 | 39x |
if (!is.null(hierarchical_predictor)) {
|
| 225 | 1x |
if (is.null(hierarchy)) {
|
| 226 | 1x |
warning(paste0( |
| 227 | 1x |
"hierarchy argument not provided, assuming linear models with intercepts ", |
| 228 | 1x |
"for all ", matched_model, " model parameters (",
|
| 229 | 1x |
paste(pars, collapse = ", "), |
| 230 |
")." |
|
| 231 |
)) |
|
| 232 | 1x |
hierarchy <- lapply(pars, function(p) {
|
| 233 | 3x |
"int_linear" |
| 234 |
}) |
|
| 235 | 1x |
names(hierarchy) <- pars |
| 236 |
} |
|
| 237 | ||
| 238 | 1x |
hrc_res <- lapply(names(hierarchy), function(pname) {
|
| 239 | 3x |
hrc_model <- hierarchy[[pname]] |
| 240 | 3x |
intModelRes <- .intModelHelper(hrc_model) |
| 241 | 3x |
hrc_model <- intModelRes$model |
| 242 | 3x |
hrc_int <- intModelRes$int |
| 243 | 3x |
.brmDparHelper( |
| 244 | 3x |
dpar = pname, model = hrc_model, x = hierarchical_predictor, |
| 245 | 3x |
group, nTimes, USEGROUP, priors, int = hrc_int |
| 246 |
) |
|
| 247 |
#* here passing `pname` to the `dpar` argument of .brmDparHelper will make |
|
| 248 |
#* .brmDparHelper add that name as a prefix on all of the existing model parameters. |
|
| 249 |
#* Since all the parameter names are unique coming into this they will be unique coming |
|
| 250 |
#* out of this as well. |
|
| 251 |
#* I'm also passing hierarchical_predictor to the x argument |
|
| 252 |
}) |
|
| 253 | 1x |
names(hrc_res) <- names(hierarchy) |
| 254 | 1x |
hrcForm <- unlist(lapply(hrc_res, function(res) {
|
| 255 | 3x |
res$dparForm |
| 256 |
})) |
|
| 257 | 1x |
hrcSplineHelperForm <- unlist(lapply(hrc_res, function(res) {
|
| 258 | 3x |
res$dparSplineHelperForm |
| 259 |
})) |
|
| 260 | 1x |
hrc_pars <- unlist(lapply(hrc_res, function(res) {
|
| 261 | 3x |
res$dpar_pars |
| 262 |
})) |
|
| 263 | 1x |
names(hrc_pars) <- NULL |
| 264 | 1x |
pars <- append(pars, hrc_pars) |
| 265 | 1x |
pars <- pars[-which(pars %in% names(hierarchy))] #* remove pars that are now estimated by other |
| 266 |
#* sub models from the later steps. |
|
| 267 |
} else {
|
|
| 268 | 38x |
hrcForm <- NULL |
| 269 | 38x |
hrcSplineHelperForm <- NULL |
| 270 |
} |
|
| 271 | 39x |
pars <- pars[!grepl("spline", pars)]
|
| 272 | ||
| 273 |
#* `Make parameter grouping formulae` |
|
| 274 | ||
| 275 | 39x |
if (!is.null(pars)) {
|
| 276 | 38x |
if (USEGROUP) {
|
| 277 | 35x |
parForm <- as.formula(paste0(paste(pars, collapse = "+"), "~0+", group)) |
| 278 |
} else {
|
|
| 279 | 3x |
parForm <- as.formula(paste0(paste(pars, collapse = "+"), "~1")) |
| 280 |
} |
|
| 281 |
} else {
|
|
| 282 | 1x |
parForm <- NULL |
| 283 |
} |
|
| 284 | ||
| 285 |
#* `Combine formulas into brms.formula object` |
|
| 286 | 39x |
if (is.null(parForm)) {
|
| 287 | 1x |
NL <- FALSE |
| 288 |
} else {
|
|
| 289 | 38x |
NL <- TRUE |
| 290 |
} |
|
| 291 | 39x |
bf_args <- list( |
| 292 | 39x |
"formula" = growthForm, dparForm, hrcForm, parForm, |
| 293 | 39x |
dparSplineHelperForm, hrcSplineHelperForm, splineHelperForm, |
| 294 | 39x |
"autocor" = corForm, "nl" = NL |
| 295 |
) |
|
| 296 | 39x |
bf_args <- bf_args[!unlist(lapply(bf_args, is.null))] |
| 297 | 39x |
bayesForm <- do.call(brms::bf, args = bf_args) |
| 298 | ||
| 299 | 39x |
out[["formula"]] <- bayesForm |
| 300 |
#* ***** `Make priors` ***** |
|
| 301 | 39x |
out[["prior"]] <- .makePriors(priors, pars, df, group, USEGROUP, sigma, family, bayesForm) |
| 302 |
#* ***** `Make initializer function` ***** |
|
| 303 | 38x |
if (!is.null(pars)) {
|
| 304 | 37x |
initFun <- function(pars = "?", nPerChain = 1) {
|
| 305 | 1x |
init <- lapply(pars, function(i) array(rgamma(nPerChain, 1))) |
| 306 | 1x |
names(init) <- paste0("b_", pars)
|
| 307 | 1x |
init |
| 308 |
} |
|
| 309 | 37x |
formals(initFun)$pars <- pars |
| 310 | 37x |
formals(initFun)$nPerChain <- length(unique(df[[group]])) |
| 311 | 37x |
wrapper <- function() {
|
| 312 | 1x |
initFun() |
| 313 |
} |
|
| 314 |
} else {
|
|
| 315 | 1x |
wrapper <- 0 |
| 316 |
} |
|
| 317 | ||
| 318 |
#* ***** `Raise Message for complex models` ***** |
|
| 319 | ||
| 320 | 38x |
if (length(pars) * length(unique(df[[group]])) > 50) {
|
| 321 | 1x |
message(paste0( |
| 322 | 1x |
"This model will estimate >50 parameters (excluding any smooth terms). \n\n", |
| 323 | 1x |
"If the MCMC is very slow then consider fitting separate models and using `combineDraws()` ", |
| 324 | 1x |
"to make a data.frame for hypothesis testing." |
| 325 |
)) |
|
| 326 |
} |
|
| 327 | ||
| 328 |
#* ***** `Return Components` ***** |
|
| 329 | 38x |
out[["initfun"]] <- wrapper |
| 330 | 38x |
out[["df"]] <- df |
| 331 | 38x |
out[["family"]] <- family |
| 332 | 38x |
out[["pcvrForm"]] <- form |
| 333 | 38x |
return(out) |
| 334 |
} |
| 1 |
#' Function to visualize brms survival models specified using growthSS. |
|
| 2 |
#' |
|
| 3 |
#' Models fit using \link{growthSS} inputs by \link{fitGrowth}
|
|
| 4 |
#' (and similar models made through other means) |
|
| 5 |
#' can be visualized easily using this function. This will generally be called by \code{growthPlot}.
|
|
| 6 |
#' |
|
| 7 |
#' @param fit A brmsfit object, similar to those fit with \code{\link{growthSS}} outputs.
|
|
| 8 |
#' @param form A formula similar to that in \code{growthSS} inputs specifying the outcome,
|
|
| 9 |
#' predictor, and grouping structure of the data as \code{outcome ~ predictor|individual/group}.
|
|
| 10 |
#' @param df An optional dataframe to use in plotting observed growth curves on top of the model. |
|
| 11 |
#' @param groups An optional set of groups to keep in the plot. |
|
| 12 |
#' Defaults to NULL in which case all groups in the model are plotted. |
|
| 13 |
#' @param timeRange An optional range of times to use. This can be used to view predictions for |
|
| 14 |
#' future data if the available data has not reached some point (such as asymptotic size), |
|
| 15 |
#' although prediction using splines outside of the observed range is not necessarily reliable. |
|
| 16 |
#' @param facetGroups logical, should groups be separated in facets? Defaults to TRUE. |
|
| 17 |
#' @import ggplot2 |
|
| 18 |
#' @import viridis |
|
| 19 |
#' @importFrom stats as.formula |
|
| 20 |
#' @examples |
|
| 21 |
#' \donttest{
|
|
| 22 |
#' set.seed(123) |
|
| 23 |
#' df <- growthSim("exponential",
|
|
| 24 |
#' n = 20, t = 50, |
|
| 25 |
#' params = list("A" = c(1, 1), "B" = c(0.15, 0.1))
|
|
| 26 |
#' ) |
|
| 27 |
#' ss1 <- growthSS( |
|
| 28 |
#' model = "survival weibull", form = y > 100 ~ time | id / group, |
|
| 29 |
#' df = df, start = c(0, 5) |
|
| 30 |
#' ) |
|
| 31 |
#' fit1 <- fitGrowth(ss1, iter = 600, cores = 2, chains = 2, backend = "cmdstanr") |
|
| 32 |
#' brmSurvPlot(fit1, form = ss1$pcvrForm, df = ss1$df) |
|
| 33 |
#' |
|
| 34 |
#' # note that using the cumulative hazard to calculate survival is likely to underestimate |
|
| 35 |
#' # survival in these plots if events do not start immediately. |
|
| 36 |
#' ss2 <- growthSS( |
|
| 37 |
#' model = "survival binomial", form = y > 100 ~ time | id / group, |
|
| 38 |
#' df = df, start = c(-4, 3) |
|
| 39 |
#' ) |
|
| 40 |
#' fit2 <- fitGrowth(ss2, iter = 600, cores = 2, chains = 2, backend = "cmdstanr") |
|
| 41 |
#' brmSurvPlot(fit2, form = ss2$pcvrForm, df = ss2$df) |
|
| 42 |
#' } |
|
| 43 |
#' |
|
| 44 |
#' @return Returns a ggplot showing a brms model's credible |
|
| 45 |
#' intervals and optionally the individual growth lines. |
|
| 46 |
#' |
|
| 47 |
#' @export |
|
| 48 | ||
| 49 |
brmSurvPlot <- function(fit, form, df = NULL, groups = NULL, timeRange = NULL, facetGroups = TRUE) {
|
|
| 50 | ! |
family <- as.character(fit$family)[1] |
| 51 | ||
| 52 | ! |
if (family == "weibull") {
|
| 53 | ! |
p <- .weibullBrmSurvPlot( |
| 54 | ! |
fit = fit, form = form, df = df, groups = groups, |
| 55 | ! |
timeRange = timeRange, facetGroups = facetGroups |
| 56 |
) |
|
| 57 | ! |
} else if (family == "binomial") {
|
| 58 | ! |
p <- .binomialBrmSurvPlot( |
| 59 | ! |
fit = fit, form = form, df = df, groups = groups, |
| 60 | ! |
timeRange = timeRange, facetGroups = facetGroups |
| 61 |
) |
|
| 62 |
} |
|
| 63 | ! |
return(p) |
| 64 |
} |
|
| 65 | ||
| 66 |
#' Internal Plotting function for weibull survival times |
|
| 67 |
#' brms predicts mu where: scale = exp(mu) / (gamma(1 + 1 / shape)) |
|
| 68 |
#' @keywords internal |
|
| 69 |
#' @noRd |
|
| 70 | ||
| 71 |
.binomialBrmSurvPlot <- function(fit, form, df = NULL, groups = NULL, |
|
| 72 |
timeRange = NULL, facetGroups = TRUE) {
|
|
| 73 |
#* `pull model data` |
|
| 74 | ! |
fitData <- fit$data |
| 75 |
#* `general pcvr formula parsing` |
|
| 76 | ! |
parsed_form <- .parsePcvrForm(form, df) |
| 77 | ! |
x <- parsed_form$x |
| 78 | ! |
group <- parsed_form$group |
| 79 | ! |
df <- parsed_form$data |
| 80 |
#* `set groups to use` |
|
| 81 | ! |
if (is.null(groups)) {
|
| 82 | ! |
groups <- unique(fitData[[group]]) |
| 83 |
} |
|
| 84 |
#* `pull draws and convert to survival` |
|
| 85 | ! |
draws <- as.matrix(fit) |
| 86 | ! |
s_hat <- brms::inv_logit_scaled(draws[, grepl("^b_", colnames(draws))])
|
| 87 | ! |
colnames(s_hat) <- gsub("b_", "haz_", colnames(s_hat))
|
| 88 | ! |
s_hat <- 1 - s_hat # take complement of hazard |
| 89 | ! |
s_hat <- as.data.frame( |
| 90 | ! |
do.call(rbind, lapply(groups, function(grp) {
|
| 91 | ! |
s_hat_grp <- s_hat[, grepl(paste0(group, grp, "$"), colnames(s_hat))] |
| 92 | ! |
x <- do.call(cbind, lapply(seq_len(ncol(s_hat_grp)), function(j) {
|
| 93 | ! |
do.call(rbind, lapply(seq_len(nrow(s_hat_grp)), function(i) {
|
| 94 | ! |
cumprod(s_hat_grp[i, 1:j])[j] |
| 95 |
})) |
|
| 96 |
})) |
|
| 97 | ! |
colnames(x) <- gsub("haz_", "surv_", colnames(s_hat_grp))
|
| 98 | ! |
colnames(x) <- gsub(paste0(":", group, grp), "", colnames(x))
|
| 99 | ! |
x <- as.data.frame(x) |
| 100 | ! |
x[[group]] <- grp |
| 101 | ! |
x |
| 102 |
})) |
|
| 103 |
) |
|
| 104 | ||
| 105 |
#* `define probabilities and take quantiles` |
|
| 106 | ! |
probs <- seq(from = 99, to = 1, by = -2) / 100 |
| 107 | ! |
quantiles <- as.data.frame( |
| 108 | ! |
do.call(rbind, lapply(groups, function(grp) {
|
| 109 | ! |
s_hat_grp <- s_hat[s_hat[[group]] == grp, ] |
| 110 | ! |
grp_quantile <- as.data.frame(do.call(rbind, lapply(1:(ncol(s_hat_grp) - 1), function(i) {
|
| 111 | ! |
quantile(s_hat_grp[, i], probs) |
| 112 |
}))) |
|
| 113 | ! |
colnames(grp_quantile) <- paste0("Q", seq(99, 1, -2))
|
| 114 | ! |
nms <- colnames(s_hat_grp)[grepl("surv_", colnames(s_hat_grp))]
|
| 115 | ! |
grp_quantile[[x]] <- as.numeric(gsub(paste0("surv_", x), "", nms))
|
| 116 | ! |
grp_quantile[[group]] <- grp |
| 117 | ! |
grp_quantile |
| 118 |
})) |
|
| 119 |
) |
|
| 120 | ! |
quantiles <- quantiles[quantiles[[group]] %in% groups, ] |
| 121 |
#* `Decide faceting` |
|
| 122 | ! |
facetLayer <- NULL |
| 123 | ! |
if (facetGroups && length(unique(fitData[[group]])) > 1) {
|
| 124 | ! |
facetLayer <- ggplot2::facet_wrap(as.formula(paste0("~", group)))
|
| 125 |
} |
|
| 126 |
#* `lengthen quantiles` |
|
| 127 | ! |
max_prime <- 0.99 |
| 128 | ! |
min_prime <- 0.01 |
| 129 | ! |
max_obs <- 49 |
| 130 | ! |
min_obs <- 1 |
| 131 | ! |
c1 <- (max_prime - min_prime) / (max_obs - min_obs) |
| 132 | ||
| 133 | ! |
longPreds <- do.call(rbind, lapply(seq_len(nrow(quantiles)), function(r) {
|
| 134 | ! |
sub <- quantiles[r, ] |
| 135 | ! |
do.call(rbind, lapply(seq(1, 49, 2), function(i) {
|
| 136 | ! |
min <- paste0("Q", i)
|
| 137 | ! |
max <- paste0("Q", 100 - i)
|
| 138 | ! |
iter <- sub[, c(x, group)] |
| 139 | ! |
iter$q <- round(1 - (c1 * (i - max_obs) + max_prime), 2) |
| 140 | ! |
iter$min <- sub[[min]] |
| 141 | ! |
iter$max <- sub[[max]] |
| 142 | ! |
iter |
| 143 |
})) |
|
| 144 |
})) |
|
| 145 | ||
| 146 |
#* `Initialize Plot` |
|
| 147 | ! |
p <- ggplot2::ggplot(longPreds, ggplot2::aes(x = .data[[x]])) + |
| 148 | ! |
facetLayer + |
| 149 | ! |
ggplot2::labs(x = x, y = "Survival") + |
| 150 | ! |
ggplot2::scale_y_continuous(labels = scales::label_percent()) + |
| 151 | ! |
pcv_theme() |
| 152 |
#* `Add Ribbons` |
|
| 153 | ! |
p <- p + |
| 154 | ! |
lapply(unique(longPreds$q), function(q) {
|
| 155 | ! |
ggplot2::geom_ribbon( |
| 156 | ! |
data = longPreds[longPreds$q == q, ], |
| 157 | ! |
ggplot2::aes( |
| 158 | ! |
ymin = min, |
| 159 | ! |
ymax = max, |
| 160 | ! |
group = .data[[group]], |
| 161 | ! |
fill = q |
| 162 | ! |
), alpha = 0.5 |
| 163 |
) |
|
| 164 |
}) + |
|
| 165 | ! |
viridis::scale_fill_viridis(direction = -1, option = "plasma") + |
| 166 | ! |
ggplot2::labs(fill = "Credible\nInterval") |
| 167 | ||
| 168 |
#* `Add KM Trend` |
|
| 169 | ! |
if (!is.null(df)) {
|
| 170 | ! |
df$pct_surv <- (1 - df$pct_event) |
| 171 | ! |
df[[x]] <- as.numeric(df[[x]]) |
| 172 | ! |
p <- p + ggplot2::geom_line(data = df, ggplot2::aes( |
| 173 | ! |
x = .data[[x]], |
| 174 | ! |
y = .data[["pct_surv"]], |
| 175 | ! |
group = .data[[group]], |
| 176 | ! |
linetype = .data[[group]] |
| 177 | ! |
), color = "black", show.legend = FALSE) |
| 178 |
} |
|
| 179 | ! |
return(p) |
| 180 |
} |
|
| 181 | ||
| 182 | ||
| 183 |
#' Internal Plotting function for weibull survival times |
|
| 184 |
#' brms predicts mu where: scale = exp(mu) / (gamma(1 + 1 / shape)) |
|
| 185 |
#' @keywords internal |
|
| 186 |
#' @noRd |
|
| 187 | ||
| 188 |
.weibullBrmSurvPlot <- function(fit, form, df = NULL, groups = NULL, |
|
| 189 |
timeRange = NULL, facetGroups = TRUE) {
|
|
| 190 |
#* `Transform draws` |
|
| 191 | ! |
fitData <- fit$data |
| 192 | ! |
fdf <- as.data.frame(fit) |
| 193 | ! |
mu_cols <- colnames(fdf)[grepl("b_", colnames(fdf))]
|
| 194 | ! |
scales <- do.call(cbind, lapply(mu_cols, function(col) {
|
| 195 | ! |
exp(fdf[[col]]) / (gamma(1 + (1 / fdf$shape))) |
| 196 |
})) |
|
| 197 | ! |
colnames(scales) <- paste0("scale_", mu_cols)
|
| 198 | ! |
fdf <- cbind(fdf, scales) |
| 199 |
#* `general pcvr formula parsing` |
|
| 200 | ! |
parsed_form <- .parsePcvrForm(form, df) |
| 201 | ! |
x <- parsed_form$x |
| 202 | ! |
group <- parsed_form$group |
| 203 | ! |
df <- parsed_form$data |
| 204 |
#* `further survival formula steps` |
|
| 205 | ||
| 206 |
#* `Define Time Range` |
|
| 207 | ! |
if (is.null(timeRange)) {
|
| 208 | ! |
timeRange <- seq(0, round(max(fitData[[x]]), -1), length.out = 100) |
| 209 |
} |
|
| 210 |
#* `set groups to use` |
|
| 211 | ! |
if (is.null(groups)) {
|
| 212 | ! |
groups <- unique(fitData[[group]]) |
| 213 |
} |
|
| 214 |
#* `Make Survival Quantiles` |
|
| 215 | ! |
probs <- seq(from = 99, to = 1, by = -2) / 100 |
| 216 | ! |
quantiles <- do.call(rbind, lapply(groups, function(grp) {
|
| 217 | ! |
test <- fdf[, c(paste0("scale_b_", group, grp), "shape")]
|
| 218 | ! |
colnames(test) <- c("scale", "shape")
|
| 219 | ! |
metrics <- do.call(rbind, lapply(probs, function(i) {
|
| 220 | ! |
shape <- quantile(test[, "shape"], probs = i) |
| 221 | ! |
scale <- quantile(test[, "scale"], probs = i) |
| 222 | ! |
do.call(rbind, lapply(timeRange, function(t) {
|
| 223 | ! |
t_row <- data.frame( |
| 224 | ! |
probs = i, |
| 225 | ! |
h = shape / scale * (t / scale)^(shape - 1), |
| 226 | ! |
cdf = 1 - exp(1)^-(t / scale)^shape, |
| 227 | ! |
pdf = shape / scale * (t / scale)^(shape - 1) * exp(1)^-(t / scale)^shape |
| 228 |
) |
|
| 229 | ! |
t_row[[x]] <- t |
| 230 | ! |
t_row$St <- 1 - t_row$cdf |
| 231 | ! |
t_row$c <- -log(t_row$St) |
| 232 | ! |
t_row |
| 233 |
})) |
|
| 234 |
})) |
|
| 235 | ||
| 236 | ! |
metrics$probs2 <- round(metrics$probs * 100, 2) |
| 237 | ! |
metrics <- data.table::as.data.table(metrics) |
| 238 | ! |
wide_metrics <- as.data.frame(data.table::dcast(metrics, time ~ paste0("Q", probs2),
|
| 239 | ! |
value.var = "St", fun.aggregate = mean |
| 240 |
)) |
|
| 241 | ! |
wide_metrics[[group]] <- grp |
| 242 | ! |
wide_metrics |
| 243 |
})) |
|
| 244 |
#* `Decide faceting` |
|
| 245 | ! |
facetLayer <- NULL |
| 246 | ! |
if (facetGroups && length(unique(fitData[[group]])) > 1) {
|
| 247 | ! |
facetLayer <- ggplot2::facet_wrap(as.formula(paste0("~", group)))
|
| 248 |
} |
|
| 249 |
#* `lengthen quantiles` |
|
| 250 | ! |
max_prime <- 0.99 |
| 251 | ! |
min_prime <- 0.01 |
| 252 | ! |
max_obs <- 49 |
| 253 | ! |
min_obs <- 1 |
| 254 | ! |
c1 <- (max_prime - min_prime) / (max_obs - min_obs) |
| 255 | ||
| 256 | ! |
longPreds <- do.call(rbind, lapply(seq_len(nrow(quantiles)), function(r) {
|
| 257 | ! |
sub <- quantiles[r, ] |
| 258 | ! |
do.call(rbind, lapply(seq(1, 49, 2), function(i) {
|
| 259 | ! |
min <- paste0("Q", i)
|
| 260 | ! |
max <- paste0("Q", 100 - i)
|
| 261 | ! |
iter <- sub[, c("time", group)]
|
| 262 | ! |
iter$q <- round(1 - (c1 * (i - max_obs) + max_prime), 2) |
| 263 | ! |
iter$min <- sub[[min]] |
| 264 | ! |
iter$max <- sub[[max]] |
| 265 | ! |
iter |
| 266 |
})) |
|
| 267 |
})) |
|
| 268 |
#* `Initialize Plot` |
|
| 269 | ! |
p <- ggplot2::ggplot(longPreds, ggplot2::aes(x = .data[["time"]])) + |
| 270 | ! |
facetLayer + |
| 271 | ! |
ggplot2::labs(x = x, y = "Survival") + |
| 272 | ! |
ggplot2::scale_y_continuous(labels = scales::label_percent()) + |
| 273 | ! |
pcv_theme() |
| 274 |
#* `Add Ribbons` |
|
| 275 | ! |
p <- p + |
| 276 | ! |
lapply(unique(longPreds$q), function(q) {
|
| 277 | ! |
ggplot2::geom_ribbon( |
| 278 | ! |
data = longPreds[longPreds$q == q, ], |
| 279 | ! |
ggplot2::aes( |
| 280 | ! |
ymin = min, |
| 281 | ! |
ymax = max, |
| 282 | ! |
group = .data[[group]], |
| 283 | ! |
fill = q |
| 284 | ! |
), alpha = 0.5 |
| 285 |
) |
|
| 286 |
}) + |
|
| 287 | ! |
viridis::scale_fill_viridis(direction = -1, option = "plasma") + |
| 288 | ! |
ggplot2::labs(fill = "Credible\nInterval") |
| 289 |
#* `Add KM Trend` |
|
| 290 | ! |
if (!is.null(df)) {
|
| 291 | ! |
km_df <- do.call(rbind, lapply(groups, function(grp) {
|
| 292 | ! |
sub <- df[df[[group]] == grp, ] |
| 293 | ! |
do.call(rbind, lapply(timeRange, function(ti) {
|
| 294 | ! |
sum_events <- sum(c(sub[as.numeric(sub[[x]]) <= ti, "event"], 0)) |
| 295 | ! |
n_at_risk <- nrow(sub) - sum_events |
| 296 | ! |
surv_pct <- n_at_risk / nrow(sub) |
| 297 | ! |
data.frame( |
| 298 | ! |
group = grp, time = ti, events = sum_events, |
| 299 | ! |
at_risk = n_at_risk, surv_pct = surv_pct |
| 300 |
) |
|
| 301 |
})) |
|
| 302 |
})) |
|
| 303 | ! |
p <- p + ggplot2::geom_line(data = km_df, ggplot2::aes( |
| 304 | ! |
x = .data[["time"]], |
| 305 | ! |
y = .data[["surv_pct"]], |
| 306 | ! |
group = .data[["group"]], |
| 307 | ! |
linetype = .data[["group"]] |
| 308 | ! |
), color = "black", show.legend = FALSE) |
| 309 |
} |
|
| 310 | ! |
return(p) |
| 311 |
} |
| 1 |
#' @description |
|
| 2 |
#' Internal function for testing conjugate methods for MV and SV traits, useful for testing only. |
|
| 3 |
#' @param method a conjugate method with mv and sv options |
|
| 4 |
#' @param prior A prior for that conjugate method |
|
| 5 |
#' @param generating a list similar to those for mvSim that will generate data. See example. |
|
| 6 |
#' @examples |
|
| 7 |
#' method = "vonmises" |
|
| 8 |
#' prior = list(mu = 0, kappa = 1, known_kappa = 1, boundary = c(0, 180), n = 1) |
|
| 9 |
#' generating = list( |
|
| 10 |
#' s1 = list(f = "rnorm", n = 20, mean = 20, sd = 5), |
|
| 11 |
#' s2 = list(f = "rnorm", n = 20, mean = 170, sd = 5) |
|
| 12 |
#' ) |
|
| 13 |
#' .conjugate.mv.sv.testing(method, prior, generating) |
|
| 14 |
#' |
|
| 15 |
#' @keywords internal |
|
| 16 |
#' @noRd |
|
| 17 | ||
| 18 |
.conjugate.mv.sv.testing <- function(method, prior, generating) {
|
|
| 19 | 6x |
s1 <- do.call(generating[[1]][[1]], generating[[1]][-1]) |
| 20 | 6x |
s2 <- do.call(generating[[2]][[1]], generating[[2]][-1]) |
| 21 | ||
| 22 | 6x |
dists1 <- stats::setNames(list(generating[[1]][-1]), generating[[1]][[1]]) |
| 23 | 6x |
m1 <- mvSim(dists1, n_samples = generating[[1]]$n)[, -1] |
| 24 | 6x |
dists2 <- stats::setNames(list(generating[[2]][-1]), generating[[2]][[1]]) |
| 25 | 6x |
m2 <- mvSim(dists2, n_samples = generating[[2]]$n)[, -1] |
| 26 | ||
| 27 | 6x |
ps <- conjugate(s1, s2, method = method, priors = prior, plot = FALSE, cred.int.level = 0.95) |
| 28 | 6x |
pm <- conjugate(m1, m2, method = method, priors = prior, plot = FALSE, cred.int.level = 0.95) |
| 29 | 6x |
ps$posterior[[1]]$datatype <- "sv" |
| 30 | 6x |
ps$posterior[[2]]$datatype <- "sv" |
| 31 | 6x |
pm$posterior[[1]]$datatype <- "mv" |
| 32 | 6x |
pm$posterior[[2]]$datatype <- "mv" |
| 33 | 6x |
ps$summary$datatype <- "sv" |
| 34 | 6x |
pm$summary$datatype <- "mv" |
| 35 | 6x |
posteriors <- do.call(rbind, c(ps$posterior, pm$posterior)) |
| 36 | 6x |
summaries <- rbind(ps$summary, pm$summary) |
| 37 | 6x |
data_used <- list(sv_s1 = s1, sv_s2 = s2, mv_s1 = m1, mv_s2 = m2) |
| 38 | 6x |
return(list("posteriors" = posteriors, "summaries" = summaries, "data_used" = data_used))
|
| 39 |
} |
| 1 |
#' Function to visualize \code{flexsurv::flexsurvreg} models fit by \code{fitGrowth}.
|
|
| 2 |
#' |
|
| 3 |
#' Models fit using \link{growthSS} inputs by \link{fitGrowth}
|
|
| 4 |
#' (and similar models made through other means) can be visualized easily using this function. |
|
| 5 |
#' This will generally be called by \code{growthPlot}.
|
|
| 6 |
#' |
|
| 7 |
#' @param fit A model fit returned by \code{fitGrowth} with type="nls".
|
|
| 8 |
#' @param form A formula similar to that in \code{growthSS} inputs
|
|
| 9 |
#' (or the \code{pcvrForm} part of the output) specifying the outcome,
|
|
| 10 |
#' predictor, and grouping structure of the data as \code{outcome ~ predictor|individual/group}.
|
|
| 11 |
#' If the individual and group are specified then the observed growth lines are plotted. |
|
| 12 |
#' @param groups An optional set of groups to keep in the plot. |
|
| 13 |
#' Defaults to NULL in which case all groups in the model are plotted. |
|
| 14 |
#' @param df A dataframe to use in plotting observed growth curves on top of the model. |
|
| 15 |
#' This must be supplied for nls models. |
|
| 16 |
#' @param timeRange Ignored, included for compatibility with other plotting functions. |
|
| 17 |
#' @param facetGroups logical, should groups be separated in facets? Defaults to TRUE. |
|
| 18 |
#' @param groupFill logical, should groups have different colors? Defaults to FALSE. If TRUE then |
|
| 19 |
#' viridis colormaps are used in the order of virMaps |
|
| 20 |
#' @param virMaps order of viridis maps to use. Will be recycled to necessary length. |
|
| 21 |
#' Defaults to "plasma", but will generally be informed by growthPlot's default. |
|
| 22 |
#' @keywords survival |
|
| 23 |
#' @importFrom methods is |
|
| 24 |
#' @import ggplot2 |
|
| 25 |
#' @importFrom stats predict |
|
| 26 |
#' @examples |
|
| 27 |
#' |
|
| 28 |
#' df <- growthSim("logistic",
|
|
| 29 |
#' n = 20, t = 25, |
|
| 30 |
#' params = list("A" = c(200, 160), "B" = c(13, 11), "C" = c(3, 3.5))
|
|
| 31 |
#' ) |
|
| 32 |
#' ss <- growthSS( |
|
| 33 |
#' model = "survival weibull", form = y > 100 ~ time | id / group, |
|
| 34 |
#' df = df, type = "flexsurv" |
|
| 35 |
#' ) |
|
| 36 |
#' fit <- fitGrowth(ss) |
|
| 37 |
#' flexsurvregPlot(fit, form = ss$pcvrForm, df = ss$df, groups = "a") |
|
| 38 |
#' flexsurvregPlot(fit, |
|
| 39 |
#' form = ss$pcvrForm, df = ss$df, |
|
| 40 |
#' facetGroups = FALSE, groupFill = TRUE |
|
| 41 |
#' ) |
|
| 42 |
#' |
|
| 43 |
#' @return Returns a ggplot showing an survival model's survival function. |
|
| 44 |
#' |
|
| 45 |
#' @export |
|
| 46 | ||
| 47 | ||
| 48 |
flexsurvregPlot <- function(fit, form, groups = NULL, df = NULL, timeRange = NULL, facetGroups = TRUE, |
|
| 49 |
groupFill = FALSE, virMaps = c("plasma")) {
|
|
| 50 |
#* `parse formula` |
|
| 51 | 2x |
parsed_form <- .parsePcvrForm(form, df) |
| 52 | 2x |
x <- parsed_form$x |
| 53 | 2x |
group <- parsed_form$group |
| 54 | 2x |
df <- parsed_form$data |
| 55 |
#* `filter by groups if groups != NULL` |
|
| 56 | 2x |
if (!is.null(groups)) {
|
| 57 | 1x |
df <- df[df[[group]] %in% groups, ] |
| 58 |
} else {
|
|
| 59 | 1x |
groups <- unique(df[[group]]) |
| 60 |
} |
|
| 61 |
#* `generate predictions` |
|
| 62 | 2x |
pct <- seq(0.01, 0.99, 0.01) |
| 63 | 2x |
preds <- predict(fit, |
| 64 | 2x |
newdata = data.frame("group" = groups),
|
| 65 | 2x |
type = "quantile", p = pct, se.fit = TRUE |
| 66 |
) |
|
| 67 | 2x |
preds <- do.call(rbind, lapply(seq_along(preds[[1]]), function(i) {
|
| 68 | 3x |
iter <- as.data.frame(preds[[1]][[i]]) |
| 69 | 3x |
colnames(iter) <- c("pct", "est", "se")
|
| 70 | 3x |
iter[[group]] <- groups[i] |
| 71 | 3x |
iter |
| 72 |
})) |
|
| 73 | 2x |
preds$surv <- 1 - preds$pct |
| 74 |
#* `facetGroups` |
|
| 75 | 2x |
if (facetGroups) {
|
| 76 | 1x |
facet_layer <- ggplot2::facet_wrap(stats::as.formula(paste0("~", group)))
|
| 77 |
} else {
|
|
| 78 | 1x |
facet_layer <- NULL |
| 79 |
} |
|
| 80 |
#* `groupFill` |
|
| 81 | 2x |
if (groupFill) {
|
| 82 | 1x |
virVals <- lapply(rep(virMaps, length.out = length(unique(df[[group]]))), function(pal) {
|
| 83 | 2x |
viridis::viridis(3, begin = 0.1, option = pal) |
| 84 |
}) |
|
| 85 | 1x |
names(virVals) <- groups |
| 86 | 1x |
color_scale <- ggplot2::scale_color_manual(values = unlist(lapply(virVals, function(pal) pal[3]))) |
| 87 |
} else {
|
|
| 88 | 1x |
virVals <- lapply(rep("plasma", length.out = length(unique(df[[group]]))), function(pal) {
|
| 89 | 1x |
viridis::viridis(3, begin = 0.1, option = pal) |
| 90 |
}) |
|
| 91 | 1x |
names(virVals) <- groups |
| 92 | 1x |
color_scale <- ggplot2::scale_color_manual(values = unlist(lapply(virVals, function(pal) pal[3]))) |
| 93 |
} |
|
| 94 |
#* `Make ggplot` |
|
| 95 | 2x |
p <- ggplot2::ggplot(preds, ggplot2::aes( |
| 96 | 2x |
x = .data[["est"]], |
| 97 | 2x |
y = .data[["surv"]], group = .data[[group]] |
| 98 |
)) + |
|
| 99 | 2x |
facet_layer + |
| 100 | 2x |
lapply(groups, function(grp) {
|
| 101 | 3x |
ggplot2::geom_ribbon(data = preds[preds[[group]] == grp, ], ggplot2::aes( |
| 102 | 3x |
xmin = .data[["est"]] - (2 * .data[["se"]]), |
| 103 | 3x |
xmax = .data[["est"]] + (2 * .data[["se"]]) |
| 104 | 3x |
), fill = virVals[[grp]][1], alpha = 0.5) |
| 105 |
}) + |
|
| 106 | 2x |
lapply(groups, function(grp) {
|
| 107 | 3x |
ggplot2::geom_ribbon(data = preds[preds[[group]] == grp, ], ggplot2::aes( |
| 108 | 3x |
xmin = .data[["est"]] - (1 * .data[["se"]]), |
| 109 | 3x |
xmax = .data[["est"]] + (1 * .data[["se"]]) |
| 110 | 3x |
), fill = virVals[[grp]][2], alpha = 0.5) |
| 111 |
}) + |
|
| 112 | 2x |
ggplot2::geom_line(ggplot2::aes(color = .data[[group]]), show.legend = FALSE) + |
| 113 | 2x |
color_scale + |
| 114 | 2x |
ggplot2::scale_y_continuous(labels = scales::label_percent()) + |
| 115 | 2x |
ggplot2::labs(x = x, y = "Survival") + |
| 116 | 2x |
pcv_theme() |
| 117 | ||
| 118 | ||
| 119 | 2x |
if (!is.null(df)) {
|
| 120 | 2x |
km_df <- do.call(rbind, lapply(groups, function(grp) {
|
| 121 | 3x |
sub <- df[df[[group]] == grp, ] |
| 122 | 3x |
do.call(rbind, lapply(seq(0, max(df[[x]]), 1), function(ti) {
|
| 123 | 54x |
sum_events <- sum(c(sub[as.numeric(sub[[x]]) <= ti, "event"], 0)) |
| 124 | 54x |
n_at_risk <- nrow(sub) - sum_events |
| 125 | 54x |
surv_pct <- n_at_risk / nrow(sub) |
| 126 | 54x |
iter <- data.frame( |
| 127 | 54x |
group = grp, time = ti, events = sum_events, |
| 128 | 54x |
at_risk = n_at_risk, surv_pct = surv_pct |
| 129 |
) |
|
| 130 | 54x |
colnames(iter)[1] <- group |
| 131 | 54x |
iter |
| 132 |
})) |
|
| 133 |
})) |
|
| 134 | 2x |
p <- p + ggplot2::geom_line(data = km_df, ggplot2::aes( |
| 135 | 2x |
x = .data[[x]], |
| 136 | 2x |
y = .data[["surv_pct"]], |
| 137 | 2x |
group = .data[[group]], |
| 138 | 2x |
linetype = .data[[group]] |
| 139 | 2x |
), color = "black") |
| 140 |
} |
|
| 141 | ||
| 142 | 2x |
return(p) |
| 143 |
} |
| 1 |
#' Function for plotting iterations of posterior distributions |
|
| 2 |
#' |
|
| 3 |
#' @param fits A list of brmsfit objects following the same data over time. |
|
| 4 |
#' Currently checkpointing is not supported. |
|
| 5 |
#' @param form A formula describing the growth model similar to \code{\link{growthSS}}
|
|
| 6 |
#' and \code{\link{brmPlot}} such as: outcome ~ predictor |individual/group
|
|
| 7 |
#' @param df data used to fit models (this is used to plot each subject's trend line). |
|
| 8 |
#' @param priors a named list of samples from the prior distributions for each parameter in |
|
| 9 |
#' \code{params}. This is only used if sample_prior=FALSE in the brmsfit object.
|
|
| 10 |
#' If left NULL then no prior is included. |
|
| 11 |
#' @param params a vector of parameters to include distribution plots of. |
|
| 12 |
#' Defaults to NULL which will use all parameters from the top level model. |
|
| 13 |
#' @param maxTime Optional parameter to designate a max time not observed in the models so far |
|
| 14 |
#' @param patch Logical, should a patchwork plot be returned or should lists of ggplots be returned? |
|
| 15 |
#' @keywords Bayesian brms |
|
| 16 |
#' @import ggplot2 |
|
| 17 |
#' @import patchwork |
|
| 18 |
#' @importFrom methods is |
|
| 19 |
#' @importFrom stats setNames |
|
| 20 |
#' @import viridis |
|
| 21 |
#' @return A ggplot or a list of ggplots (depending on patch). |
|
| 22 |
#' @export |
|
| 23 |
#' @examples |
|
| 24 |
#' \donttest{
|
|
| 25 |
#' print(load(url("https://raw.githubusercontent.com/joshqsumner/pcvrTestData/main/brmsFits.rdata")))
|
|
| 26 |
#' library(brms) |
|
| 27 |
#' library(ggplot2) |
|
| 28 |
#' library(patchwork) |
|
| 29 |
#' fits <- list(fit_3, fit_15) |
|
| 30 |
#' form <- y~time | id / group |
|
| 31 |
#' priors <- list( |
|
| 32 |
#' "phi1" = rlnorm(2000, log(130), 0.25), |
|
| 33 |
#' "phi2" = rlnorm(2000, log(12), 0.25), |
|
| 34 |
#' "phi3" = rlnorm(2000, log(3), 0.25) |
|
| 35 |
#' ) |
|
| 36 |
#' params <- c("A", "B", "C")
|
|
| 37 |
#' d <- simdf |
|
| 38 |
#' maxTime <- NULL |
|
| 39 |
#' patch <- TRUE |
|
| 40 |
#' from3to25 <- list( |
|
| 41 |
#' fit_3, fit_5, fit_7, fit_9, fit_11, |
|
| 42 |
#' fit_13, fit_15, fit_17, fit_19, fit_21, fit_23, fit_25 |
|
| 43 |
#' ) |
|
| 44 |
#' distributionPlot( |
|
| 45 |
#' fits = from3to25, form = y ~ time | id / group, |
|
| 46 |
#' params = params, d = d, priors = priors, patch = FALSE |
|
| 47 |
#' ) |
|
| 48 |
#' distributionPlot( |
|
| 49 |
#' fits = from3to25, form = y ~ time | id / group, |
|
| 50 |
#' params = params, d = d, patch = FALSE |
|
| 51 |
#' ) |
|
| 52 |
#' } |
|
| 53 |
#' ## End(Not run) |
|
| 54 |
distributionPlot <- function(fits, form, df, priors = NULL, |
|
| 55 |
params = NULL, maxTime = NULL, patch = TRUE) {
|
|
| 56 |
#* ***** `Reused helper variables` |
|
| 57 | ! |
parsed_form <- .parsePcvrForm(form, df) |
| 58 | ! |
y <- parsed_form$y |
| 59 | ! |
x <- parsed_form$x |
| 60 | ! |
individual <- parsed_form$individual |
| 61 | ! |
group <- parsed_form$group |
| 62 | ! |
d <- parsed_form$data |
| 63 | ! |
fitData <- fits[[length(fits)]]$data |
| 64 | ! |
dSplit <- split(d, d[[group]]) |
| 65 | ! |
startTime <- min(unlist(lapply(fits, function(ft) {
|
| 66 | ! |
min(ft$data[[x]], na.rm = TRUE) |
| 67 |
}))) |
|
| 68 | ! |
if (is.null(maxTime)) {
|
| 69 | ! |
endTime <- max(unlist(lapply(fits, function(ft) {
|
| 70 | ! |
max(ft$data[[x]], na.rm = TRUE) |
| 71 |
}))) |
|
| 72 |
} |
|
| 73 | ! |
byTime <- mean(diff(unlist(lapply(fits, function(ft) {
|
| 74 | ! |
max(ft$data[[x]], na.rm = TRUE) |
| 75 |
})))) |
|
| 76 | ! |
timeRange <- seq(startTime, endTime, byTime) |
| 77 | ! |
virOptions <- c("C", "G", "B", "D", "A", "H", "E", "F")
|
| 78 | ! |
palettes <- lapply( |
| 79 | ! |
seq_along(unique(fitData[[group]])), |
| 80 | ! |
function(i) {
|
| 81 | ! |
viridis::viridis(length(timeRange), |
| 82 | ! |
begin = 0.1, |
| 83 | ! |
end = 1, option = virOptions[i], direction = 1 |
| 84 |
) |
|
| 85 |
} |
|
| 86 |
) |
|
| 87 | ! |
names(palettes) <- unique(fitData[[group]]) |
| 88 | ||
| 89 |
#* ***** `if params is null then pull them from growth formula` |
|
| 90 | ||
| 91 | ! |
if (is.null(params)) {
|
| 92 | ! |
fit <- fits[[1]] |
| 93 | ! |
growthForm <- as.character(fit$formula[[1]])[[3]] |
| 94 | ||
| 95 | ! |
test <- gsub(x, "", growthForm) # ;test |
| 96 | ! |
test2 <- gsub("exp\\(", "", test) # ; test2
|
| 97 | ! |
test3 <- gsub("\\(1", "", test2) # ;test3
|
| 98 | ! |
test4 <- gsub("[/]|[+]|[-]|[)]|[()]", "", test3)
|
| 99 | ! |
params <- strsplit(test4, "\\s+")[[1]] |
| 100 | ||
| 101 | ||
| 102 | ! |
test3 <- gsub("[)]|[()]", "", test2)
|
| 103 | ! |
test3 |
| 104 |
} |
|
| 105 | ||
| 106 |
#* ***** `growth trendline plots` |
|
| 107 | ||
| 108 | ! |
growthTrendPlots <- lapply(seq_along(dSplit), function(i) {
|
| 109 | ! |
dt <- dSplit[[i]] |
| 110 | ! |
ggplot2::ggplot(dt, ggplot2::aes( |
| 111 | ! |
x = .data[[x]], y = .data[[y]], color = .data[[x]], |
| 112 | ! |
group = .data[[individual]] |
| 113 |
)) + |
|
| 114 | ! |
ggplot2::geom_line(show.legend = FALSE) + |
| 115 | ! |
viridis::scale_color_viridis(begin = 0.1, end = 1, option = virOptions[i], direction = 1) + |
| 116 | ! |
ggplot2::scale_x_continuous(limits = c(startTime, endTime)) + |
| 117 | ! |
pcv_theme() |
| 118 |
}) |
|
| 119 | ||
| 120 |
#* ***** `posterior distribution extraction` |
|
| 121 | ||
| 122 | ! |
posts <- do.call(rbind, lapply(fits, function(fit) {
|
| 123 | ! |
time <- max(fit$data[[x]], na.rm = TRUE) |
| 124 | ! |
fitDraws <- do.call(cbind, lapply(params, function(par) {
|
| 125 | ! |
draws <- as.data.frame(fit)[grepl(par, colnames(as.data.frame(fit)))] |
| 126 | ! |
if (nrow(brms::prior_draws(fit)) > 1) {
|
| 127 | ! |
draws <- draws[!grepl("^prior_", colnames(draws))]
|
| 128 |
} |
|
| 129 | ! |
splits <- strsplit(colnames(draws), split = "") |
| 130 | ! |
mx <- max(unlist(lapply(splits, length))) |
| 131 | ! |
ind <- which(unlist(lapply(1:mx, function(i) {
|
| 132 | ! |
length(unique(rapply(splits, function(j) {
|
| 133 | ! |
j[i] |
| 134 | ! |
}))) != 1 |
| 135 |
}))) |
|
| 136 | ! |
if (length(ind) > 0) {
|
| 137 | ! |
colnames(draws) <- paste(par, unlist(lapply(colnames(draws), function(c) {
|
| 138 | ! |
substr(c, min(ind), max(c(ind, nchar(c)))) |
| 139 | ! |
})), sep = "_") |
| 140 |
} |
|
| 141 | ! |
draws |
| 142 |
})) |
|
| 143 | ! |
fitDraws$time <- time |
| 144 | ! |
fitDraws |
| 145 |
})) |
|
| 146 | ||
| 147 |
#* ***** `prior distribution extraction` |
|
| 148 | ! |
distPlotPriorExtractionRes <- .distPlotPriorExtraction(fits, priors, d, group, params, x) |
| 149 | ! |
prior_df <- distPlotPriorExtractionRes[["prior_df"]] |
| 150 | ! |
USEPRIOR <- distPlotPriorExtractionRes[["UP"]] |
| 151 | ||
| 152 |
#* ***** `posterior distribution plots` |
|
| 153 |
#* need to assign ordering of factors |
|
| 154 |
#* if USEPRIOR then join data, don't make separate geom |
|
| 155 | ||
| 156 | ! |
if (USEPRIOR) {
|
| 157 | ! |
posts <- rbind(prior_df, posts) |
| 158 |
} |
|
| 159 | ! |
posts[[x]] <- factor(posts[[x]], levels = sort(as.numeric(unique(posts[[x]]))), ordered = TRUE) |
| 160 | ||
| 161 | ! |
lapply(posts, summary) |
| 162 | ||
| 163 | ! |
xlims <- lapply(params, function(par) {
|
| 164 | ! |
diff <- as.numeric(as.matrix(posts[, grepl(paste0("^", par, "_"), colnames(posts))]))
|
| 165 | ! |
c(min(diff, na.rm = TRUE), max(diff, na.rm = TRUE)) |
| 166 |
}) |
|
| 167 | ! |
names(xlims) <- params |
| 168 | ! |
postPlots <- lapply(unique(fitData[[group]]), function(groupVal) {
|
| 169 | ! |
groupPlots <- lapply(params, function(par) {
|
| 170 | ! |
p <- ggplot2::ggplot(posts) + |
| 171 | ! |
ggplot2::geom_density(ggplot2::aes( |
| 172 | ! |
x = .data[[paste(par, groupVal, sep = "_")]], |
| 173 | ! |
fill = .data[[x]], color = .data[[x]], |
| 174 | ! |
group = .data[[x]] |
| 175 | ! |
), alpha = 0.8) + |
| 176 | ! |
ggplot2::labs(x = paste(par, group, groupVal)) + |
| 177 | ! |
ggplot2::coord_cartesian(xlim = xlims[[par]]) + |
| 178 | ! |
pcv_theme() + |
| 179 | ! |
ggplot2::theme( |
| 180 | ! |
axis.text.x.bottom = ggplot2::element_text(angle = 0), |
| 181 | ! |
legend.position = "none", axis.title.y = ggplot2::element_blank() |
| 182 |
) |
|
| 183 | ||
| 184 | ! |
if (USEPRIOR) {
|
| 185 | ! |
p <- p + ggplot2::scale_fill_manual(values = c("black", palettes[[groupVal]])) +
|
| 186 | ! |
ggplot2::scale_color_manual(values = c("black", palettes[[groupVal]]))
|
| 187 |
} else {
|
|
| 188 | ! |
p <- p + ggplot2::scale_fill_manual(values = palettes[[groupVal]]) + |
| 189 | ! |
ggplot2::scale_color_manual(values = palettes[[groupVal]]) |
| 190 |
} |
|
| 191 | ! |
return(p) |
| 192 |
}) |
|
| 193 | ! |
return(groupPlots) |
| 194 |
}) |
|
| 195 | ||
| 196 | ! |
if (patch) {
|
| 197 | ! |
ncol_patch <- 1 + length(params) |
| 198 | ! |
nrow_patch <- length(postPlots) |
| 199 | ||
| 200 | ! |
patchPlot <- growthTrendPlots[[1]] + postPlots[[1]] |
| 201 | ! |
if (length(unique(d[[group]])) > 1) {
|
| 202 | ! |
for (i in 2:length(growthTrendPlots)) {
|
| 203 | ! |
patchPlot <- patchPlot + growthTrendPlots[[i]] + postPlots[[i]] |
| 204 |
} |
|
| 205 |
} |
|
| 206 | ! |
out <- patchPlot + patchwork::plot_layout(ncol = ncol_patch, nrow = nrow_patch) |
| 207 |
} else {
|
|
| 208 | ! |
out <- list(growthTrendPlots, postPlots) |
| 209 |
} |
|
| 210 | ! |
return(out) |
| 211 |
} |
|
| 212 | ||
| 213 |
#' Prior extraction in distPlot |
|
| 214 |
#' @keywords internal |
|
| 215 |
#' @noRd |
|
| 216 | ||
| 217 |
.distPlotPriorExtraction <- function(fits, priors, d, group, params, x) {
|
|
| 218 | ! |
if (is.null(priors)) {
|
| 219 | ! |
return(list("prior_df" = NULL, "UP" = FALSE))
|
| 220 |
} |
|
| 221 | ! |
if (all(unlist(lapply(fits, function(fit) nrow(brms::prior_draws(fit)) < 1)))) {
|
| 222 |
# if no models were fit with sample_prior |
|
| 223 | ! |
if (!is.null(priors)) { # if prior is supplied as argument
|
| 224 | ! |
USEPRIOR <- TRUE |
| 225 | ! |
if (!methods::is(priors[[1]], "list")) {
|
| 226 | ! |
priors <- lapply(seq_along(unique(d[[group]])), function(i) priors) |
| 227 | ! |
names(priors) <- unique(d[[group]]) |
| 228 |
} |
|
| 229 | ! |
prior_df <- do.call(cbind, lapply(names(priors), function(nm) {
|
| 230 | ! |
nmp <- priors[[nm]] |
| 231 | ! |
setNames(data.frame(do.call(cbind, lapply(names(nmp), function(nmpn) {
|
| 232 | ! |
nmp[[nmpn]] |
| 233 | ! |
}))), paste0(names(nmp), "_", nm)) |
| 234 |
})) |
|
| 235 | ! |
prior_df[[x]] <- 0 |
| 236 |
} else {
|
|
| 237 | ! |
USEPRIOR <- FALSE |
| 238 |
} |
|
| 239 |
} else { #* `need to fit some models with sample_prior and see how this works with them`
|
|
| 240 | ! |
prior_df <- brms::prior_draws(fits[[1]]) |
| 241 | ! |
prior_df <- prior_df[, grepl(paste0("b_", paste0(params, collapse = "|")), colnames(prior_df))]
|
| 242 | ! |
colnames(prior_df) <- gsub(group, "", colnames(prior_df)) |
| 243 | ! |
colnames(prior_df) <- gsub("^b_", "", colnames(prior_df))
|
| 244 | ! |
prior_df[[x]] <- 0 |
| 245 | ! |
USEPRIOR <- TRUE |
| 246 |
} |
|
| 247 | ! |
return(list("prior_df" = prior_df, "UP" = USEPRIOR))
|
| 248 |
} |
| 1 |
#' Function to parse survival model specifications in growthSS for brms type models |
|
| 2 |
#' |
|
| 3 |
#' @param model a survival distribution to use (currently "binomial" and "weibull" are supported) |
|
| 4 |
#' @param form a formula in pcvr syntax |
|
| 5 |
#' @param df a dataframe to use |
|
| 6 |
#' @param priors priors specified per details in growthSS |
|
| 7 |
#' |
|
| 8 |
#' @return A list of elements to pass to fitGrowth |
|
| 9 |
#' |
|
| 10 |
#' @examples |
|
| 11 |
#' set.seed(123) |
|
| 12 |
#' model = "survival weibull" |
|
| 13 |
#' form <- y > 100 ~ time | id / group |
|
| 14 |
#' df <- growthSim("logistic",
|
|
| 15 |
#' n = 20, t = 25, |
|
| 16 |
#' params = list("A" = c(200, 160), "B" = c(13, 11), "C" = c(3, 3.5))
|
|
| 17 |
#' ) |
|
| 18 |
#' surv <- .survModelParser(model) |
|
| 19 |
#' survivalBool <- surv$survival |
|
| 20 |
#' model <- surv$model |
|
| 21 |
#' prior <- c(0, 5) |
|
| 22 |
#' ss <- .brmsSurvSS(model, form, df, prior) |
|
| 23 |
#' lapply(ss, head) |
|
| 24 |
#' |
|
| 25 |
#' @keywords internal |
|
| 26 |
#' @noRd |
|
| 27 | ||
| 28 |
.brmsSurvSS <- function(model = NULL, form = NULL, df = NULL, priors = NULL) {
|
|
| 29 | 9x |
out <- list() |
| 30 |
#* `make survival data` |
|
| 31 | 9x |
fixData <- TRUE |
| 32 | 9x |
if (grepl("binomial", model)) {
|
| 33 | 4x |
if (all(c("n_events", "n_eligible") %in% colnames(df))) {
|
| 34 | 2x |
fixData <- FALSE |
| 35 |
} |
|
| 36 |
} else { # weibull
|
|
| 37 | 5x |
if (all(c("event", "censor") %in% colnames(df))) {
|
| 38 | 2x |
fixData <- FALSE |
| 39 |
} |
|
| 40 |
} |
|
| 41 | 9x |
if (fixData) {
|
| 42 | 5x |
makeSurvDataRet <- .makeSurvData(df, form, model) |
| 43 | 5x |
out_df <- makeSurvDataRet$data |
| 44 | 5x |
out[["df"]] <- out_df |
| 45 |
} else {
|
|
| 46 | 4x |
makeSurvDataRet <- list() |
| 47 | 4x |
getGroup <- trimws(strsplit(as.character(form)[3], "[|]|[/]")[[1]]) |
| 48 | 4x |
makeSurvDataRet$group <- getGroup[length(getGroup)] |
| 49 | 4x |
makeSurvDataRet$x <- getGroup[1] |
| 50 | 4x |
out_df <- df |
| 51 | 4x |
out[["df"]] <- out_df |
| 52 |
} |
|
| 53 | ||
| 54 |
#* `make bayesian formula` |
|
| 55 | 9x |
if (model == "binomial") {
|
| 56 | 4x |
form_ret <- .brmsurv_binomial_formula( |
| 57 | 4x |
x = makeSurvDataRet$x, group = makeSurvDataRet$group, |
| 58 | 4x |
df = out_df |
| 59 |
) |
|
| 60 | 5x |
} else if (model == "weibull") {
|
| 61 | 5x |
form_ret <- .brmsurv_weibull_formula( |
| 62 | 5x |
x = makeSurvDataRet$x, group = makeSurvDataRet$group, |
| 63 | 5x |
df = out_df |
| 64 |
) |
|
| 65 |
} |
|
| 66 | 9x |
out[["family"]] <- form_ret$family |
| 67 | 9x |
out[["formula"]] <- form_ret$formula |
| 68 |
#* `make priors if none specified` |
|
| 69 | 9x |
out[["prior"]] <- .brmsMakeSurvPriors(priors, out_df, makeSurvDataRet, form_ret) |
| 70 |
#* `set initialization to 0 for all chains` |
|
| 71 | 9x |
out[["initfun"]] <- 0 |
| 72 | 9x |
out[["pcvrForm"]] <- form |
| 73 | 9x |
return(out) |
| 74 |
} |
|
| 75 | ||
| 76 | ||
| 77 |
#' Helper function to make priors for brms survival models |
|
| 78 |
#' @return A list with a formula and a model family |
|
| 79 |
#' @keywords internal |
|
| 80 |
#' @noRd |
|
| 81 | ||
| 82 |
.brmsMakeSurvPriors <- function(priors, out_df, makeSurvDataRet, form_ret) {
|
|
| 83 | 9x |
if (is.null(priors)) {
|
| 84 | 6x |
return(brms::prior_string("normal(0, 5)", class = "b"))
|
| 85 | 3x |
} else if (any(methods::is(priors, "brmsprior"))) {
|
| 86 | 1x |
return(priors) |
| 87 | 2x |
} else if (is.numeric(priors)) {
|
| 88 | 2x |
priors <- rep(priors, length.out = 2 * length(unique(out_df[[makeSurvDataRet$group]]))) |
| 89 | 2x |
priors <- stats::setNames( |
| 90 | 2x |
lapply(seq(1, length(priors), 2), function(i) {
|
| 91 | 4x |
c(priors[i], priors[i + 1]) |
| 92 |
}), |
|
| 93 | 2x |
unique(out_df[[makeSurvDataRet$group]]) |
| 94 |
) |
|
| 95 | 2x |
message( |
| 96 | 2x |
"Prior is numeric, replicating to ", length(unique(out_df[[makeSurvDataRet$group]])), |
| 97 | 2x |
" length 2 elements (mu, sd) and assuming order ", |
| 98 | 2x |
paste(unique(out_df[[makeSurvDataRet$group]]), |
| 99 | 2x |
collapse = ", " |
| 100 |
) |
|
| 101 |
) |
|
| 102 |
} |
|
| 103 | 2x |
pars <- brms::get_prior(formula = form_ret$formula, data = out_df, family = form_ret$family)$coef |
| 104 | 2x |
pars <- pars[which(nchar(pars) > 0)] |
| 105 | 2x |
if (length(priors) != length(pars)) {
|
| 106 | 1x |
message(paste0( |
| 107 | 1x |
"Priors and parameters are not the same length. Output will assume that priors are for groups", |
| 108 | 1x |
" and are in order: ", paste(unique(out_df[[makeSurvDataRet$group]]), collapse = ", ") |
| 109 |
)) |
|
| 110 | 1x |
priors <- stats::setNames( |
| 111 | 1x |
rep(priors, length.out = length(unique(out_df[[makeSurvDataRet$group]]))), |
| 112 | 1x |
unique(out_df[[makeSurvDataRet$group]]) |
| 113 |
) |
|
| 114 |
} |
|
| 115 | 2x |
prior_obj <- NULL |
| 116 | 2x |
for (g in unique(out_df[[makeSurvDataRet$group]])) {
|
| 117 | 4x |
sub_pars <- pars[grepl(paste0(makeSurvDataRet$group, g), pars)] |
| 118 | 4x |
for (param in sub_pars) {
|
| 119 | 52x |
if (is.null(prior_obj)) {
|
| 120 | 2x |
prior_obj <- brms::set_prior( |
| 121 | 2x |
prior = paste0("normal(", priors[[g]][1], ",", priors[[g]][2], ")"),
|
| 122 | 2x |
coef = param |
| 123 |
) |
|
| 124 |
} else {
|
|
| 125 | 50x |
prior_obj <- prior_obj + brms::set_prior( |
| 126 | 50x |
prior = paste0("normal(", priors[[g]][1], ",", priors[[g]][2], ")"),
|
| 127 | 50x |
coef = param |
| 128 |
) |
|
| 129 |
} |
|
| 130 |
} |
|
| 131 |
} |
|
| 132 | 2x |
return(prior_obj) |
| 133 |
} |
|
| 134 | ||
| 135 | ||
| 136 |
#' Helper function to make formulas for brms binomial survival models |
|
| 137 |
#' @return A list with a formula and a model family |
|
| 138 |
#' @keywords internal |
|
| 139 |
#' @noRd |
|
| 140 | ||
| 141 |
.brmsurv_binomial_formula <- function(y = "n_events", x = "time", total = "n_eligible", |
|
| 142 |
group = "groups", df = NULL) {
|
|
| 143 |
#* make formula |
|
| 144 | 4x |
if (is.null(group) || length(unique(df[[group]])) == 1) {
|
| 145 | 1x |
formula <- stats::as.formula(paste0(y, " | trials(", total, ") ~ 0 + ", x))
|
| 146 |
} else {
|
|
| 147 | 3x |
formula <- stats::as.formula(paste0(y, " | trials(", total, ") ~ 0 + ", x, ":", group))
|
| 148 |
} |
|
| 149 |
#* specify family |
|
| 150 | 4x |
family <- "binomial" # using default links |
| 151 | 4x |
return(list("formula" = formula, "family" = family))
|
| 152 |
} |
|
| 153 | ||
| 154 |
#' Helper function to make formulas for brms weibull survival models |
|
| 155 |
#' @return A list with a formula and a model family |
|
| 156 |
#' @keywords internal |
|
| 157 |
#' @noRd |
|
| 158 | ||
| 159 |
.brmsurv_weibull_formula <- function(y = "event", x = "time", censor = "censor", |
|
| 160 |
group = "groups", df = NULL) {
|
|
| 161 |
#* make formula |
|
| 162 | 5x |
if (is.null(group) || length(unique(df[[group]])) == 1) {
|
| 163 | 1x |
formula <- stats::as.formula(paste0(x, " | cens(", censor, ") ~ 1"))
|
| 164 |
} else {
|
|
| 165 | 4x |
formula <- stats::as.formula(paste0(x, " | cens(", censor, ") ~ 0 + ", group))
|
| 166 |
} |
|
| 167 |
#* specify family |
|
| 168 | 5x |
family <- "weibull" # using default links |
| 169 | 5x |
return(list("formula" = formula, "family" = family))
|
| 170 |
} |
|
| 171 | ||
| 172 | ||
| 173 | ||
| 174 | ||
| 175 |
#' Function to parse survival model specifications in growthSS for modeling in the survival package |
|
| 176 |
#' |
|
| 177 |
#' @return a list of components passed to fitGrowth |
|
| 178 |
#' |
|
| 179 |
#' @examples |
|
| 180 |
#' |
|
| 181 |
#' set.seed(123) |
|
| 182 |
#' model = "survival weibull" |
|
| 183 |
#' form <- y > 100 ~ time | id / group |
|
| 184 |
#' df <- growthSim("logistic",
|
|
| 185 |
#' n = 20, t = 25, |
|
| 186 |
#' params = list("A" = c(200, 160), "B" = c(13, 11), "C" = c(3, 3.5))
|
|
| 187 |
#' ) |
|
| 188 |
#' surv <- .survModelParser(model) |
|
| 189 |
#' survivalBool <- surv$survival |
|
| 190 |
#' model <- surv$model |
|
| 191 |
#' ss <- .survSS(model, form, df) |
|
| 192 |
#' lapply(ss, head) |
|
| 193 |
#' |
|
| 194 |
#' @importFrom survival survreg Surv |
|
| 195 |
#' |
|
| 196 |
#' @keywords internal |
|
| 197 |
#' @noRd |
|
| 198 | ||
| 199 |
.survSS <- function(model = NULL, form = NULL, df = NULL) {
|
|
| 200 | 2x |
out <- list() |
| 201 |
#* `make survival data` |
|
| 202 | 2x |
fixData <- TRUE |
| 203 | 2x |
if (all(c("event") %in% colnames(df))) {
|
| 204 | 1x |
fixData <- FALSE |
| 205 |
} |
|
| 206 | 2x |
if (fixData) {
|
| 207 | 1x |
makeSurvDataRet <- .makeSurvData(df, form, model = "weibull") |
| 208 | 1x |
out_df <- makeSurvDataRet$data |
| 209 | 1x |
out_df[[makeSurvDataRet$group]] <- factor(out_df[[makeSurvDataRet$group]]) |
| 210 | 1x |
out_df[[paste0(makeSurvDataRet$group, "_numericLabel")]] <- unclass(out_df[[makeSurvDataRet$group]]) |
| 211 | 1x |
out[["df"]] <- out_df |
| 212 |
} else {
|
|
| 213 | 1x |
makeSurvDataRet <- list() |
| 214 | 1x |
getGroup <- trimws(strsplit(as.character(form)[3], "[|]|[/]")[[1]]) |
| 215 | 1x |
makeSurvDataRet$group <- getGroup[length(getGroup)] |
| 216 | 1x |
makeSurvDataRet$x <- getGroup[1] |
| 217 | 1x |
out[["df"]] <- df |
| 218 |
} |
|
| 219 |
#* `make survreg formula` |
|
| 220 | 2x |
x <- makeSurvDataRet$x |
| 221 | 2x |
group <- makeSurvDataRet$group |
| 222 | 2x |
if (is.null(group) || length(unique(df[[group]])) == 1) {
|
| 223 | 1x |
formula <- stats::as.formula(paste0("Surv(", x, ", event) ~ 1"))
|
| 224 |
} else {
|
|
| 225 | 1x |
formula <- stats::as.formula(paste0("Surv(", x, ", event) ~ 1 + group"))
|
| 226 |
} |
|
| 227 | 2x |
out[["formula"]] <- formula |
| 228 |
#* `return all` |
|
| 229 | 2x |
out[["pcvrForm"]] <- form |
| 230 | 2x |
out[["distribution"]] <- model |
| 231 | 2x |
return(out) |
| 232 |
} |
|
| 233 | ||
| 234 | ||
| 235 |
#' Function to parse survival model specifications in growthSS for modeling in the flexsurv package |
|
| 236 |
#' |
|
| 237 |
#' @return a list of components passed to fitGrowth |
|
| 238 |
#' |
|
| 239 |
#' @examples |
|
| 240 |
#' |
|
| 241 |
#' set.seed(123) |
|
| 242 |
#' model = "survival gengamma" |
|
| 243 |
#' form <- y > 100 ~ time | id / group |
|
| 244 |
#' df <- growthSim("logistic",
|
|
| 245 |
#' n = 20, t = 25, |
|
| 246 |
#' params = list("A" = c(200, 160), "B" = c(13, 11), "C" = c(3, 3.5))
|
|
| 247 |
#' ) |
|
| 248 |
#' surv <- .survModelParser(model) |
|
| 249 |
#' survivalBool <- surv$survival |
|
| 250 |
#' model <- surv$model |
|
| 251 |
#' ss <- .flexSurvSS(model, form, df) |
|
| 252 |
#' lapply(ss, head) |
|
| 253 |
#' |
|
| 254 |
#' @importFrom survival survreg Surv |
|
| 255 |
#' |
|
| 256 |
#' @keywords internal |
|
| 257 |
#' @noRd |
|
| 258 | ||
| 259 |
.flexSurvSS <- function(model = NULL, form = NULL, df = NULL, anc = NULL) {
|
|
| 260 | 4x |
out <- list() |
| 261 | 4x |
distributions <- c( |
| 262 | 4x |
"gengamma", "gengamma.orig", "genf", "genf.orig", |
| 263 | 4x |
"weibull", "gamma", "exp", "llogis", "lnorm", "gompertz", |
| 264 | 4x |
"exponential", "lognormal" |
| 265 |
) |
|
| 266 | 4x |
if (!model %in% distributions) {
|
| 267 | 1x |
stop(paste0( |
| 268 | 1x |
"Supported distributions for flexsurv models are ", |
| 269 | 1x |
paste(distributions, collapse = ", "), |
| 270 | 1x |
".\nIf you are using a custom distribution please call flexsurvreg directly." |
| 271 |
)) |
|
| 272 |
} |
|
| 273 |
#* `make survival data` |
|
| 274 | 3x |
fixData <- TRUE |
| 275 | 3x |
if (all(c("event") %in% colnames(df))) {
|
| 276 | 1x |
fixData <- FALSE |
| 277 |
} |
|
| 278 | 3x |
if (fixData) {
|
| 279 | 2x |
makeSurvDataRet <- .makeSurvData(df, form, model = "weibull") |
| 280 | 2x |
out_df <- makeSurvDataRet$data |
| 281 | 2x |
out_df[[makeSurvDataRet$group]] <- factor(out_df[[makeSurvDataRet$group]]) |
| 282 | 2x |
out_df[[paste0(makeSurvDataRet$group, "_numericLabel")]] <- unclass(out_df[[makeSurvDataRet$group]]) |
| 283 | 2x |
out[["df"]] <- out_df |
| 284 |
} else {
|
|
| 285 | 1x |
makeSurvDataRet <- list() |
| 286 | 1x |
getGroup <- trimws(strsplit(as.character(form)[3], "[|]|[/]")[[1]]) |
| 287 | 1x |
makeSurvDataRet$group <- getGroup[length(getGroup)] |
| 288 | 1x |
makeSurvDataRet$x <- getGroup[1] |
| 289 | 1x |
out[["df"]] <- df |
| 290 |
} |
|
| 291 |
#* `make main survival formula` |
|
| 292 | 3x |
x <- makeSurvDataRet$x |
| 293 | 3x |
group <- makeSurvDataRet$group |
| 294 | 3x |
if (is.null(group) || length(unique(df[[group]])) == 1) {
|
| 295 | 1x |
formula <- stats::as.formula(paste0("Surv(", x, ", event) ~ 1"))
|
| 296 |
} else {
|
|
| 297 | 2x |
formula <- stats::as.formula(paste0("Surv(", x, ", event) ~ 1 + group"))
|
| 298 |
} |
|
| 299 | 3x |
out[["formula"]][["f1"]] <- formula |
| 300 |
#* `collect additional formulas` |
|
| 301 | 3x |
if (!is.null(anc)) {
|
| 302 | 1x |
out[["formula"]][["f2"]] <- anc |
| 303 |
} else {
|
|
| 304 | 2x |
out[["formula"]][["f2"]] <- NULL |
| 305 |
} |
|
| 306 | ||
| 307 |
#* `return all` |
|
| 308 | 3x |
out[["pcvrForm"]] <- form |
| 309 | 3x |
out[["distribution"]] <- model |
| 310 | 3x |
return(out) |
| 311 |
} |
| 1 |
#' Ease of use nlrq starter function for 6 growth model parameterizations |
|
| 2 |
#' |
|
| 3 |
#' Internal to growthSS |
|
| 4 |
#' |
|
| 5 |
#' @examples |
|
| 6 |
#' |
|
| 7 |
#' simdf <- growthSim("logistic",
|
|
| 8 |
#' n = 20, t = 25, |
|
| 9 |
#' params = list("A" = c(200, 160), "B" = c(13, 11), "C" = c(3, 3.5))
|
|
| 10 |
#' ) |
|
| 11 |
#' |
|
| 12 |
#' ss <- .mgcvSS(model = "gam", form = y ~ time | id / group, df = simdf) |
|
| 13 |
#' names(ss) # formula, df, pcvrForm |
|
| 14 |
#' |
|
| 15 |
#' @keywords internal |
|
| 16 |
#' @noRd |
|
| 17 | ||
| 18 |
.mgcvSS <- function(model = "gam", form, df) {
|
|
| 19 |
#* `parse form argument` |
|
| 20 | 3x |
parsed_form <- .parsePcvrForm(form, df) |
| 21 | 3x |
y <- parsed_form$y |
| 22 | 3x |
x <- parsed_form$x |
| 23 | 3x |
group <- parsed_form$group |
| 24 | 3x |
USEGROUP <- parsed_form$USEG |
| 25 | 3x |
if (parsed_form$USEID) {
|
| 26 | 2x |
message(paste0("Individual is not used with type = 'gam'."))
|
| 27 |
} |
|
| 28 | 3x |
df <- parsed_form$data |
| 29 | ||
| 30 | 3x |
if (USEGROUP) {
|
| 31 | 2x |
df[[group]] <- factor(df[[group]]) |
| 32 | 2x |
df[[paste0(group, "_numericLabel")]] <- unclass(df[[group]]) |
| 33 |
} |
|
| 34 |
#* `assemble gam formula` |
|
| 35 | 3x |
if (USEGROUP) {
|
| 36 | 2x |
gam_form <- stats::as.formula(paste0(y, "~0+", group, "+s(", x, ", by=", group, ")"))
|
| 37 |
} else {
|
|
| 38 | 1x |
gam_form <- stats::as.formula(paste0(y, "~0+s(", x, ")"))
|
| 39 |
} |
|
| 40 |
#* `return list` |
|
| 41 | 3x |
out <- list() |
| 42 | 3x |
out[["formula"]] <- gam_form |
| 43 | 3x |
out[["df"]] <- df |
| 44 | 3x |
out[["pcvrForm"]] <- form |
| 45 | 3x |
return(out) |
| 46 |
} |
| 1 |
#' Remove outliers from bellwether data using cook's distance |
|
| 2 |
#' |
|
| 3 |
#' @param df Data frame to use. Can be in long or wide format. |
|
| 4 |
#' @param phenotype Column to use to classify outliers. If this is length > 1 then |
|
| 5 |
#' it is taken as the multi-value traits to use. See examples. |
|
| 6 |
#' @param naTo0 Logical, should NA values to changed to 0. |
|
| 7 |
#' @param group Grouping variables to find outliers as a character vector. |
|
| 8 |
#' This is typically time and design variables (DAS, genotype, treatment, etc). |
|
| 9 |
#' These are used as predictors for `phenotype` in a generalized linear model. |
|
| 10 |
#' @param cutoff Cutoff for something being an "outlier" expressed as a multiplier |
|
| 11 |
#' on the mean of Cooks Distance for this data. This defaults to 5, with higher values |
|
| 12 |
#' being more conservative (removing less of the data). |
|
| 13 |
#' @param outlierMethod Method to be used in detecting outliers. |
|
| 14 |
#' Currently "cooks" and "mahalanobis" distances are supported, with "mahalanobis" only |
|
| 15 |
#' being supported for multi-value traits. |
|
| 16 |
#' @param plotgroup Grouping variables for drawing plots if plot=TRUE. |
|
| 17 |
#' Typically this is an identifier for images of a plant |
|
| 18 |
#' over time and defaults to c('barcode',"rotation").
|
|
| 19 |
#' @param plot Logical, if TRUE then a list is returned with a ggplot and a dataframe. |
|
| 20 |
#' @param x Optional specification for x axis variable if plot is true. |
|
| 21 |
#' If left NULL (the default) then the first element of `group` is used. |
|
| 22 |
#' @param traitCol Column with phenotype names, defaults to "trait". |
|
| 23 |
#' This should generally not need to be changed from the default. |
|
| 24 |
#' If this and valueCol are present in colnames(df) then the data |
|
| 25 |
#' is assumed to be in long format. |
|
| 26 |
#' @param valueCol Column with phenotype values, defaults to "value". |
|
| 27 |
#' This should generally not need to be changed from the default. |
|
| 28 |
#' @param labelCol Column with phenotype labels for long data, defaults to "label". |
|
| 29 |
#' This should generally not need to be changed from the default. |
|
| 30 |
#' @param idCol Column(s) that identify individuals over time. |
|
| 31 |
#' Defaults to plotGroup. |
|
| 32 |
#' @param ncp Optionally specify the number of principle components to be used for MV data outlier |
|
| 33 |
#' detection with cooks distance. If left NULL (the default) then 3 will be used. |
|
| 34 |
#' @param separate Optionally separate the data by some variable to speed up the modeling step. |
|
| 35 |
#' If you have a design variable with |
|
| 36 |
#' very many levels then it may be helpful to separate by that variable. |
|
| 37 |
#' Note this will subset the data for each model so it will change |
|
| 38 |
#' the outlier removal (generally to be more conservative). |
|
| 39 |
#' @keywords ggplot outliers |
|
| 40 |
#' @import ggplot2 |
|
| 41 |
#' @import data.table |
|
| 42 |
#' @importFrom stats complete.cases cooks.distance glm as.formula lm mahalanobis cov |
|
| 43 |
#' @examples |
|
| 44 |
#' |
|
| 45 |
#' |
|
| 46 |
#' sv <- growthSim("logistic",
|
|
| 47 |
#' n = 5, t = 20, |
|
| 48 |
#' params = list("A" = c(200, 160), "B" = c(13, 11), "C" = c(3, 3.5))
|
|
| 49 |
#' ) |
|
| 50 |
#' sv[130, ]$y <- 500 |
|
| 51 |
#' sv_res <- bw.outliers( |
|
| 52 |
#' df = sv, phenotype = "y", naTo0 = FALSE, cutoff = 15, |
|
| 53 |
#' group = c("time", "group"), outlierMethod = "cooks",
|
|
| 54 |
#' plotgroup = "id", plot = TRUE |
|
| 55 |
#' ) |
|
| 56 |
#' sv_res$plot |
|
| 57 |
#' \donttest{
|
|
| 58 |
#' library(data.table) |
|
| 59 |
#' mvw <- read.pcv(paste0( |
|
| 60 |
#' "https://media.githubusercontent.com/media/joshqsumner/", |
|
| 61 |
#' "pcvrTestData/main/pcv4-multi-value-traits.csv" |
|
| 62 |
#' ), mode = "wide", reader = "fread") |
|
| 63 |
#' mvw$genotype <- substr(mvw$barcode, 3, 5) |
|
| 64 |
#' mvw$genotype <- ifelse(mvw$genotype == "002", "B73", |
|
| 65 |
#' ifelse(mvw$genotype == "003", "W605S", |
|
| 66 |
#' ifelse(mvw$genotype == "004", "MM", "Mo17") |
|
| 67 |
#' ) |
|
| 68 |
#' ) |
|
| 69 |
#' mvw$fertilizer <- substr(mvw$barcode, 8, 8) |
|
| 70 |
#' mvw$fertilizer <- ifelse(mvw$fertilizer == "A", "100", |
|
| 71 |
#' ifelse(mvw$fertilizer == "B", "50", "0") |
|
| 72 |
#' ) |
|
| 73 |
#' mvw <- bw.time(mvw, timeCol = "timestamp", group = "barcode", plot = FALSE) |
|
| 74 |
#' |
|
| 75 |
#' phenotypes <- which(grepl("hue_freq", colnames(mvw)))
|
|
| 76 |
#' |
|
| 77 |
#' mvw2 <- bw.outliers( |
|
| 78 |
#' df = mvw, phenotype = phenotypes, naTo0 = FALSE, outlierMethod = "cooks", |
|
| 79 |
#' group = c("DAS", "genotype", "fertilizer"), cutoff = 3, plotgroup = c("barcode", "rotation")
|
|
| 80 |
#' ) |
|
| 81 |
#' |
|
| 82 |
#' |
|
| 83 |
#' mvl <- read.pcv(paste0( |
|
| 84 |
#' "https://media.githubusercontent.com/media/joshqsumner/", |
|
| 85 |
#' "pcvrTestData/main/pcv4-multi-value-traits.csv" |
|
| 86 |
#' ), mode = "long") |
|
| 87 |
#' mvl$genotype <- substr(mvl$barcode, 3, 5) |
|
| 88 |
#' mvl$genotype <- ifelse(mvl$genotype == "002", "B73", |
|
| 89 |
#' ifelse(mvl$genotype == "003", "W605S", |
|
| 90 |
#' ifelse(mvl$genotype == "004", "MM", "Mo17") |
|
| 91 |
#' ) |
|
| 92 |
#' ) |
|
| 93 |
#' mvl$fertilizer <- substr(mvl$barcode, 8, 8) |
|
| 94 |
#' mvl$fertilizer <- ifelse(mvl$fertilizer == "A", "100", |
|
| 95 |
#' ifelse(mvl$fertilizer == "B", "50", "0") |
|
| 96 |
#' ) |
|
| 97 |
#' mvl <- bw.time(mvl, timeCol = "timestamp", group = "barcode", plot = FALSE) |
|
| 98 |
#' |
|
| 99 |
#' mvl2 <- bw.outliers( |
|
| 100 |
#' df = mvl, phenotype = "hue_frequencies", naTo0 = FALSE, outlierMethod = "cooks", |
|
| 101 |
#' group = c("DAS", "genotype", "fertilizer"), cutoff = 3, plotgroup = c("barcode", "rotation")
|
|
| 102 |
#' ) |
|
| 103 |
#' } |
|
| 104 |
#' |
|
| 105 |
#' @return The input dataframe with outliers removed and optionally a plot |
|
| 106 |
#' (if a plot is returned then output is a list). |
|
| 107 |
#' @export |
|
| 108 | ||
| 109 | ||
| 110 |
bw.outliers <- function(df = NULL, |
|
| 111 |
phenotype, |
|
| 112 |
naTo0 = FALSE, |
|
| 113 |
group = c(), |
|
| 114 |
cutoff = 3, |
|
| 115 |
outlierMethod = "cooks", |
|
| 116 |
plotgroup = c("barcode", "rotation"),
|
|
| 117 |
plot = TRUE, x = NULL, traitCol = "trait", valueCol = "value", |
|
| 118 |
labelCol = "label", idCol = NULL, ncp = NULL, separate = NULL) {
|
|
| 119 | 2x |
wide <- .detectWideData(df, traitCol, valueCol) |
| 120 | 2x |
if (is.null(phenotype)) {
|
| 121 | ! |
stop("A phenotype must be provided")
|
| 122 |
} |
|
| 123 | ||
| 124 | 2x |
if ((wide && length(phenotype) > 1) || |
| 125 | 2x |
(!wide && length(unique( |
| 126 | 2x |
interaction(df[df[[traitCol]] == phenotype, colnames(df) %in% c(traitCol, labelCol)]) |
| 127 | 2x |
)) > 1)) {
|
| 128 | ! |
mv <- TRUE |
| 129 |
} else {
|
|
| 130 | 2x |
mv <- FALSE |
| 131 |
} |
|
| 132 | ||
| 133 | 2x |
if (is.null(idCol)) {
|
| 134 | 2x |
idCol <- plotgroup |
| 135 |
} |
|
| 136 | ||
| 137 | 2x |
if (!is.null(separate)) {
|
| 138 | ! |
dfList <- split(df, df[[separate]]) |
| 139 | ! |
group <- group[!grepl(separate, group)] |
| 140 |
} else {
|
|
| 141 | 2x |
dfList <- list(df) |
| 142 |
} |
|
| 143 | ||
| 144 | 2x |
resList <- .applyOutlierMethod( |
| 145 | 2x |
dfList, wide, mv, outlierMethod, |
| 146 | 2x |
naTo0, phenotype, group, cutoff, |
| 147 | 2x |
ncp, traitCol, valueCol, labelCol, idCol |
| 148 |
) |
|
| 149 | ||
| 150 | 2x |
df <- do.call(rbind, lapply(resList, function(res) {
|
| 151 | 2x |
res[["data"]] |
| 152 |
})) |
|
| 153 | 2x |
pctRm <- do.call(rbind, lapply(seq_along(resList), function(i) {
|
| 154 | 2x |
data.frame(i = i, pctRm = resList[[i]][["pctRm"]]) |
| 155 |
})) |
|
| 156 | ||
| 157 | 2x |
out <- df[which(!df$outlier), -which(grepl("outlier", colnames(df)))]
|
| 158 | ||
| 159 | 2x |
removedInteractions <- do.call(rbind, lapply( |
| 160 | 2x |
resList, |
| 161 | 2x |
function(d) {
|
| 162 | 2x |
tab <- as.data.frame(table(d$data[, c(separate, group)])) |
| 163 | 2x |
colnames(tab) <- c(separate, group, "Freq") |
| 164 | 2x |
return(tab[tab$Freq == 0, ]) |
| 165 |
} |
|
| 166 |
)) |
|
| 167 | 2x |
if (nrow(removedInteractions) > 0) {
|
| 168 | 1x |
warning(paste0(nrow(removedInteractions), " groupings had all observations removed")) |
| 169 |
} |
|
| 170 | ||
| 171 | 2x |
if (plot) {
|
| 172 | 2x |
p <- .outlierPlottingHelper(wide, mv, df, plotgroup, group, x, phenotype, traitCol, valueCol, pctRm) |
| 173 | 2x |
out <- list("data" = out, "plot" = p)
|
| 174 |
} |
|
| 175 | ||
| 176 | 2x |
return(out) |
| 177 |
} |
|
| 178 | ||
| 179 | ||
| 180 |
#' *********************************************************************************************** |
|
| 181 |
#' *************** `Apply outlier methods` **************************************** |
|
| 182 |
#' *********************************************************************************************** |
|
| 183 |
#' @description |
|
| 184 |
#' Internal function to apply outlier methods |
|
| 185 |
#' |
|
| 186 |
#' @keywords internal |
|
| 187 |
#' @noRd |
|
| 188 | ||
| 189 |
.applyOutlierMethod <- function(dfList, wide, mv, outlierMethod, |
|
| 190 |
naTo0, phenotype, group, cutoff, |
|
| 191 |
ncp, traitCol, valueCol, labelCol, idCol) {
|
|
| 192 | 2x |
resList <- lapply(dfList, function(df) {
|
| 193 | 2x |
mv_label <- if (mv) {
|
| 194 | ! |
"mv" |
| 195 |
} else {
|
|
| 196 | 2x |
"sv" |
| 197 |
} |
|
| 198 | 2x |
wide_label <- if (wide) {
|
| 199 | 2x |
".wide" |
| 200 |
} else {
|
|
| 201 | ! |
".long" |
| 202 |
} |
|
| 203 | 2x |
matched_fun <- get(paste(wide_label, mv_label, outlierMethod, "outliers", sep = "_")) |
| 204 | 2x |
res <- matched_fun(df, naTo0, phenotype, group, cutoff, ncp, traitCol, valueCol, labelCol, idCol) |
| 205 | 2x |
return(res) |
| 206 |
}) |
|
| 207 | 2x |
return(resList) |
| 208 |
} |
|
| 209 | ||
| 210 | ||
| 211 |
#' *********************************************************************************************** |
|
| 212 |
#' *************** `wide data detection` **************************************** |
|
| 213 |
#' *********************************************************************************************** |
|
| 214 |
#' @description |
|
| 215 |
#' Internal function for outlier detection in wide SV data. |
|
| 216 |
#' |
|
| 217 |
#' @keywords internal |
|
| 218 |
#' @noRd |
|
| 219 | ||
| 220 |
.detectWideData <- function(df, traitCol, valueCol) {
|
|
| 221 | 4x |
if (all(c(traitCol, valueCol) %in% colnames(df))) {
|
| 222 | ! |
wide <- FALSE |
| 223 |
} else {
|
|
| 224 | 4x |
wide <- TRUE |
| 225 |
} |
|
| 226 | 4x |
return(wide) |
| 227 |
} |
|
| 228 | ||
| 229 | ||
| 230 |
#' *********************************************************************************************** |
|
| 231 |
#' *************** `plotting helper` **************************************** |
|
| 232 |
#' *********************************************************************************************** |
|
| 233 |
#' @description |
|
| 234 |
#' Internal function for outlier detection in wide SV data. |
|
| 235 |
#' |
|
| 236 |
#' @keywords internal |
|
| 237 |
#' @noRd |
|
| 238 | ||
| 239 |
.outlierPlottingHelper <- function(wide, mv, df, plotgroup, group, |
|
| 240 |
x, phenotype, traitCol, valueCol, pctRm) {
|
|
| 241 | 2x |
if (is.null(x)) {
|
| 242 | 2x |
x <- group[1] |
| 243 |
} |
|
| 244 | 2x |
if (wide && !mv) {
|
| 245 | 2x |
df$grouping <- interaction(df[, plotgroup]) |
| 246 | 2x |
outPlotData <- df[!df$outlier, ] |
| 247 | 2x |
rmdfPlotData <- df[df$outlier, ] |
| 248 | 2x |
p <- ggplot2::ggplot() + |
| 249 | 2x |
ggplot2::facet_wrap(stats::as.formula(paste0("~", paste(group[-1], collapse = "+")))) +
|
| 250 | 2x |
ggplot2::geom_line(data = df, ggplot2::aes( |
| 251 | 2x |
x = .data[[x]], y = .data[[phenotype]], |
| 252 | 2x |
group = .data[["grouping"]] |
| 253 | 2x |
), linewidth = 0.25) + |
| 254 | 2x |
ggplot2::labs(title = paste0("~", round(mean(pctRm$pctRm), 3), "% Removed")) +
|
| 255 | 2x |
pcv_theme() |
| 256 | ||
| 257 | 2x |
yLims <- ggplot2::layer_scales(p)$y$range$range |
| 258 | ||
| 259 | 2x |
p <- p + |
| 260 | 2x |
ggplot2::geom_point( |
| 261 | 2x |
data = rmdfPlotData, ggplot2::aes( |
| 262 | 2x |
x = .data[[x]], |
| 263 | 2x |
y = .data[[phenotype]] |
| 264 |
), |
|
| 265 | 2x |
color = "red", size = 0.5 |
| 266 |
) + |
|
| 267 | 2x |
ggplot2::coord_cartesian(ylim = yLims) |
| 268 | ! |
} else if (!wide && !mv) {
|
| 269 | ! |
plotdf <- df[df[[traitCol]] == phenotype, ] |
| 270 | ! |
plotdf$grouping <- interaction(plotdf[, plotgroup]) |
| 271 | ! |
outPlotData <- plotdf[!plotdf$outlier, ] |
| 272 | ! |
rmdfPlotData <- plotdf[plotdf$outlier, ] |
| 273 | ! |
p <- ggplot2::ggplot() + |
| 274 | ! |
ggplot2::facet_wrap(stats::as.formula(paste0("~", paste(group[-1], collapse = "+")))) +
|
| 275 | ! |
ggplot2::geom_line(data = plotdf, ggplot2::aes( |
| 276 | ! |
x = .data[[x]], y = .data[[valueCol]], |
| 277 | ! |
group = .data[["grouping"]] |
| 278 | ! |
), linewidth = 0.25) + |
| 279 | ! |
ggplot2::labs(title = paste0("~", round(mean(pctRm$pctRm), 3), "% Removed")) +
|
| 280 | ! |
pcv_theme() |
| 281 | ||
| 282 | ! |
yLims <- ggplot2::layer_scales(p)$y$range$range |
| 283 | ||
| 284 | ! |
p <- p + |
| 285 | ! |
ggplot2::geom_point( |
| 286 | ! |
data = rmdfPlotData, ggplot2::aes( |
| 287 | ! |
x = .data[[x]], |
| 288 | ! |
y = .data[[valueCol]] |
| 289 |
), |
|
| 290 | ! |
color = "red", size = 0.5 |
| 291 |
) + |
|
| 292 | ! |
ggplot2::coord_cartesian(ylim = yLims) |
| 293 | ! |
} else if (wide && mv) {
|
| 294 | ! |
plotdf <- suppressWarnings( |
| 295 | ! |
as.data.frame( |
| 296 | ! |
data.table::melt(data.table::as.data.table(df), |
| 297 | ! |
measure.vars = phenotype, |
| 298 | ! |
variable.name = traitCol, |
| 299 | ! |
value.name = valueCol |
| 300 |
) |
|
| 301 |
) |
|
| 302 |
) |
|
| 303 | ||
| 304 | ! |
plotdf$bin <- as.numeric(regmatches(plotdf$trait, regexpr("[0-9]+", plotdf$trait)))
|
| 305 | ||
| 306 | ! |
plotdf$grouping <- interaction(plotdf[, plotgroup]) |
| 307 | ! |
outPlotData <- plotdf[!plotdf$outlier, ] |
| 308 | ! |
rmdfPlotData <- plotdf[plotdf$outlier, ] |
| 309 | ||
| 310 | ! |
p <- ggplot2::ggplot() + |
| 311 | ! |
ggplot2::facet_wrap(stats::as.formula(paste0("~", paste(group[-1], collapse = "+")))) +
|
| 312 | ! |
ggplot2::geom_col( |
| 313 | ! |
data = rmdfPlotData, ggplot2::aes(x = .data[["bin"]], y = .data[[valueCol]]), |
| 314 | ! |
position = "identity", |
| 315 | ! |
fill = "red", alpha = 0.25 |
| 316 |
) + |
|
| 317 | ! |
ggplot2::geom_col( |
| 318 | ! |
data = outPlotData, ggplot2::aes(x = .data[["bin"]], y = .data[[valueCol]]), |
| 319 | ! |
position = "identity", |
| 320 | ! |
alpha = 0.25 |
| 321 |
) + |
|
| 322 | ! |
ggplot2::labs(title = paste0("~", round(mean(pctRm$pctRm), 3), "% Removed")) +
|
| 323 | ! |
pcv_theme() |
| 324 | ! |
} else if (!wide && mv) {
|
| 325 | ! |
plotdf <- df |
| 326 | ! |
plotdf$grouping <- interaction(plotdf[, plotgroup]) |
| 327 | ! |
outPlotData <- plotdf[!plotdf$outlier, ] |
| 328 | ! |
rmdfPlotData <- plotdf[plotdf$outlier, ] |
| 329 | ||
| 330 | ! |
p <- ggplot2::ggplot() + |
| 331 | ! |
ggplot2::facet_wrap(stats::as.formula(paste0("~", paste(group[-1], collapse = "+")))) +
|
| 332 | ! |
ggplot2::geom_col( |
| 333 | ! |
data = rmdfPlotData, ggplot2::aes(x = .data[[traitCol]], y = .data[[valueCol]]), |
| 334 | ! |
position = "identity", |
| 335 | ! |
fill = "red", alpha = 0.25 |
| 336 |
) + |
|
| 337 | ! |
ggplot2::geom_col( |
| 338 | ! |
data = outPlotData, ggplot2::aes(x = .data[[traitCol]], y = .data[[valueCol]]), |
| 339 | ! |
position = "identity", |
| 340 | ! |
alpha = 0.25 |
| 341 |
) + |
|
| 342 | ! |
ggplot2::labs(title = paste0("~", round(mean(pctRm$pctRm), 3), "% Removed")) +
|
| 343 | ! |
pcv_theme() |
| 344 |
} |
|
| 345 | 2x |
return(p) |
| 346 |
} |
|
| 347 | ||
| 348 | ||
| 349 |
#' *********************************************************************************************** |
|
| 350 |
#' *************** `wide SV cooks distance` **************************************** |
|
| 351 |
#' *********************************************************************************************** |
|
| 352 |
#' @description |
|
| 353 |
#' Internal function for outlier detection in wide SV data. |
|
| 354 |
#' |
|
| 355 |
#' @keywords internal |
|
| 356 |
#' @noRd |
|
| 357 | ||
| 358 |
.wide_sv_cooks_outliers <- function(df, naTo0, phenotype, group, cutoff, ncp, |
|
| 359 |
traitCol, valueCol, labelCol, idCol) {
|
|
| 360 | 2x |
if (naTo0) {
|
| 361 | ! |
df[[phenotype]][is.na(df[[phenotype]])] <- 0 |
| 362 |
} |
|
| 363 | 2x |
df <- df[complete.cases(df[, c(phenotype, group)]), ] |
| 364 | 2x |
outlierForm <- paste("as.numeric(", phenotype, ")~", paste(paste0("as.factor(", group, ")"),
|
| 365 | 2x |
collapse = ":" |
| 366 |
)) |
|
| 367 | 2x |
cooksd <- cooks.distance(glm(data = df, as.formula(outlierForm))) |
| 368 | 2x |
outlierCutoff <- cutoff * mean(cooksd, na.rm = TRUE) |
| 369 | 2x |
cooksd[is.na(cooksd)] <- outlierCutoff - 0.1 # keeping NAs |
| 370 | 2x |
cooksd_df <- data.frame("outlier" = cooksd)
|
| 371 | 2x |
df <- cbind(df, cooksd_df) |
| 372 | 2x |
df$outlier <- df$outlier > outlierCutoff |
| 373 | 2x |
pctRm <- 100 * (round(nrow(df[df$outlier, ]) / nrow(df), 5)) |
| 374 | ||
| 375 | 2x |
return(list("data" = df, "pctRm" = pctRm))
|
| 376 |
} |
|
| 377 | ||
| 378 | ||
| 379 |
#' *********************************************************************************************** |
|
| 380 |
#' *************** `long SV cooks distance` **************************************** |
|
| 381 |
#' *********************************************************************************************** |
|
| 382 |
#' @description |
|
| 383 |
#' Internal function for outlier detection in wide SV data. |
|
| 384 |
#' |
|
| 385 |
#' @keywords internal |
|
| 386 |
#' @noRd |
|
| 387 | ||
| 388 |
.long_sv_cooks_outliers <- function(df, naTo0, phenotype, group, cutoff, ncp, |
|
| 389 |
traitCol, valueCol, labelCol, idCol) {
|
|
| 390 | ! |
if (naTo0) {
|
| 391 | ! |
df[df[[traitCol]] == phenotype, valueCol][is.na(df[df[[traitCol]] == phenotype, valueCol])] <- 0 |
| 392 |
} |
|
| 393 | ! |
subdf <- df[complete.cases(df[ |
| 394 | ! |
df[[traitCol]] == phenotype, |
| 395 | ! |
c(valueCol, traitCol, group) |
| 396 | ! |
]) & df[[traitCol]] == phenotype, ] |
| 397 | ! |
outlierForm <- paste("as.numeric(", valueCol, ")~", paste(paste0("as.factor(", group, ")"),
|
| 398 | ! |
collapse = ":" |
| 399 |
)) |
|
| 400 | ! |
cooksd <- cooks.distance(glm(data = subdf, as.formula(outlierForm))) |
| 401 | ! |
outlierCutoff <- cutoff * mean(cooksd, na.rm = TRUE) |
| 402 | ! |
cooksd[is.na(cooksd)] <- outlierCutoff - 0.1 # keeping NAs by assigning a value below cutoff. |
| 403 | ! |
cooksd_df <- data.frame("outlier" = cooksd)
|
| 404 | ! |
subdf <- cbind(subdf, cooksd_df) |
| 405 | ! |
subdf <- subdf[, c(group, idCol, "outlier")] |
| 406 | ! |
subdf <- subdf[!duplicated(subdf[, c(group, idCol)]), ] |
| 407 | ! |
subdf$outlier <- subdf$outlier > outlierCutoff |
| 408 | ! |
pctRm <- 100 * (round(nrow(subdf[subdf$outlier, ]) / nrow(subdf), 5)) |
| 409 |
#* take IDs using plotgroup and label all phenotype rows |
|
| 410 | ! |
df <- merge(df, subdf, all.x = TRUE) |
| 411 | ! |
return(list("data" = df, "pctRm" = pctRm))
|
| 412 |
} |
|
| 413 | ||
| 414 |
#' *********************************************************************************************** |
|
| 415 |
#' *************** `wide MV cooks distance` **************************************** |
|
| 416 |
#' *********************************************************************************************** |
|
| 417 |
#' @description |
|
| 418 |
#' Internal function for outlier detection in wide MV data. |
|
| 419 |
#' |
|
| 420 |
#' @keywords internal |
|
| 421 |
#' @noRd |
|
| 422 | ||
| 423 |
.wide_mv_cooks_outliers <- function(df, naTo0, phenotype, group, cutoff, ncp, |
|
| 424 |
traitCol, valueCol, labelCol, idCol) {
|
|
| 425 | ! |
if (naTo0) {
|
| 426 | ! |
df[, phenotype][is.na(df[, phenotype])] <- 0 |
| 427 |
} |
|
| 428 | ||
| 429 | ! |
phenos_df <- df[, phenotype] |
| 430 | ! |
if (is.null(ncp)) {
|
| 431 | ! |
ncp <- min(min(dim(phenos_df)) - 1, 3) |
| 432 |
} |
|
| 433 | ! |
pca <- FactoMineR::PCA(phenos_df, ncp = ncp, graph = FALSE) |
| 434 | ! |
coords <- as.data.frame(pca$ind) |
| 435 | ! |
coords <- coords[, grepl("coord", colnames(coords))]
|
| 436 | ! |
colnames(coords) <- gsub("coord.Dim.", "pc", colnames(coords))
|
| 437 | ! |
pca_cols <- colnames(coords) |
| 438 | ! |
df <- cbind(df, coords) |
| 439 | ||
| 440 | ! |
df <- df[complete.cases(df[, c(pca_cols, group)]), ] |
| 441 | ||
| 442 | ! |
outlierForm <- paste( |
| 443 | ! |
"cbind(", paste0("pc", 1:ncp, collapse = ","), ")~",
|
| 444 | ! |
paste(paste0("as.factor(", group, ")"), collapse = ":")
|
| 445 |
) |
|
| 446 | ! |
cooksd <- cooks.distance(lm(data = df, as.formula(outlierForm))) |
| 447 | ||
| 448 | ! |
df <- df[, -which(colnames(df) %in% c(paste0("pc", 1:ncp)))]
|
| 449 | ||
| 450 | ! |
if (length(cutoff) == 1) {
|
| 451 | ! |
cutoff <- rep(cutoff, ncp) |
| 452 |
} |
|
| 453 | ! |
outlierCutoffs <- cutoff * colMeans(cooksd, na.rm = TRUE) |
| 454 | ||
| 455 | ! |
outlierMatrix <- do.call(cbind, lapply(seq_len(ncol(cooksd)), function(i) {
|
| 456 | ! |
cooks_vec <- cooksd[, i] # this is causing a problem |
| 457 | ! |
cooks_vec[is.na(cooks_vec)] <- outlierCutoffs[i] - 0.1 |
| 458 | ! |
setNames(data.frame(cooks_vec > outlierCutoffs[i]), paste0("outlier_", i))
|
| 459 |
})) |
|
| 460 | ! |
outlierMatrix$outlier <- unlist(lapply(seq_len(nrow(outlierMatrix)), function(i) {
|
| 461 | ! |
any(outlierMatrix[i, ]) # could be a more nuanced rule |
| 462 |
})) |
|
| 463 | ||
| 464 | ! |
df <- cbind(df, outlierMatrix) |
| 465 | ! |
pctRm <- 100 * (round(nrow(df[df$outlier, ]) / nrow(df), 5)) |
| 466 | ! |
return(list("data" = df, "pctRm" = pctRm))
|
| 467 |
} |
|
| 468 | ||
| 469 | ||
| 470 |
#' *********************************************************************************************** |
|
| 471 |
#' *************** `long MV cooks distance` **************************************** |
|
| 472 |
#' *********************************************************************************************** |
|
| 473 |
#' @description |
|
| 474 |
#' Internal function for outlier detection in long MV data. |
|
| 475 |
#' |
|
| 476 |
#' @keywords internal |
|
| 477 |
#' @noRd |
|
| 478 | ||
| 479 |
.long_mv_cooks_outliers <- function(df, naTo0, phenotype, group, cutoff, ncp, |
|
| 480 |
traitCol, valueCol, labelCol, idCol) {
|
|
| 481 |
#* widen data |
|
| 482 | ! |
dcast_form <- as.formula(paste0("... ~ ", traitCol, "+", labelCol))
|
| 483 | ! |
dfw <- as.data.frame(data.table::dcast(data.table::as.data.table(df[df[[traitCol]] == phenotype, ]), |
| 484 | ! |
dcast_form, |
| 485 | ! |
value.var = valueCol, sep = "." |
| 486 |
)) |
|
| 487 | ! |
phenotypew <- which(grepl(phenotype, colnames(dfw))) |
| 488 |
#* call .wide method on dfw |
|
| 489 | ! |
wide_res <- .wide_mv_cooks_outliers(dfw, naTo0, phenotypew, group, cutoff, ncp) |
| 490 | ! |
pctRm <- wide_res[["pctRm"]] |
| 491 | ! |
sub_df <- wide_res[["data"]] |
| 492 |
#* label long data based on .wide output |
|
| 493 | ! |
kept <- unique(as.character(interaction(sub_df[!sub_df$outlier, c(group, idCol)]))) |
| 494 | ! |
df_ids <- as.character(interaction(df[, c(group, idCol)])) |
| 495 | ! |
df$outlier <- !df_ids %in% kept |
| 496 | ||
| 497 | ! |
return(list("data" = df, "pctRm" = pctRm))
|
| 498 |
} |
|
| 499 | ||
| 500 |
#' *********************************************************************************************** |
|
| 501 |
#' *************** `wide MV mahalanobis distance` **************************************** |
|
| 502 |
#' *********************************************************************************************** |
|
| 503 |
#' @description |
|
| 504 |
#' Internal function for outlier detection in wide MV data. |
|
| 505 |
#' |
|
| 506 |
#' @keywords internal |
|
| 507 |
#' @noRd |
|
| 508 | ||
| 509 | ||
| 510 |
.wide_mv_mahalanobis_outliers <- function(df, naTo0, phenotype, group, cutoff, ncp, |
|
| 511 |
traitCol, valueCol, labelCol, idCol) {
|
|
| 512 | ! |
if (naTo0) {
|
| 513 | ! |
df[, phenotype][is.na(df[, phenotype])] <- 0 |
| 514 |
} |
|
| 515 | ||
| 516 | ! |
phenos_df <- df[, phenotype] |
| 517 | ! |
phenos_df <- phenos_df[, colSums(phenos_df) > 1] |
| 518 | ! |
mahala_center <- colMeans(phenos_df, na.rm = TRUE) |
| 519 | ! |
mahala_cov <- stats::cov(phenos_df) |
| 520 | ! |
m <- stats::mahalanobis(phenos_df, mahala_center, mahala_cov) |
| 521 | ||
| 522 | ! |
df$mahal <- m |
| 523 | ! |
group_inter <- unique(as.character(interaction(df[, group]))) |
| 524 | ||
| 525 | ! |
df_out <- do.call(rbind, lapply(group_inter, function(grp) {
|
| 526 | ! |
subMeta <- df[interaction(df[, group]) == grp, ] |
| 527 | ! |
subMeta$outlier <- ifelse(subMeta$mahal > cutoff * mean(subMeta$mahal, na.rm = TRUE), TRUE, FALSE) |
| 528 | ! |
subMeta |
| 529 |
})) |
|
| 530 | ||
| 531 | ! |
pctRm <- 100 * (round(nrow(df_out[df_out$outlier, ]) / nrow(df_out), 5)) |
| 532 | ||
| 533 | ! |
return(list("data" = df_out, "pctRm" = pctRm))
|
| 534 |
} |
|
| 535 | ||
| 536 |
#' *********************************************************************************************** |
|
| 537 |
#' *************** `long MV mahalanobis distance` **************************************** |
|
| 538 |
#' *********************************************************************************************** |
|
| 539 |
#' @description |
|
| 540 |
#' Internal function for outlier detection in long MV data. |
|
| 541 |
#' |
|
| 542 |
#' @keywords internal |
|
| 543 |
#' @noRd |
|
| 544 | ||
| 545 |
.long_mv_mahalanobis_outliers <- function(df, naTo0, phenotype, group, cutoff, |
|
| 546 |
ncp, traitCol, valueCol, labelCol, idCol) {
|
|
| 547 |
#* widen data |
|
| 548 | ! |
dcast_form <- as.formula(paste0("... ~ ", traitCol, "+", labelCol))
|
| 549 | ! |
dfw <- as.data.frame(data.table::dcast(data.table::as.data.table(df[df[[traitCol]] == phenotype, ]), |
| 550 | ! |
dcast_form, |
| 551 | ! |
value.var = valueCol, sep = "." |
| 552 |
)) |
|
| 553 | ! |
phenotypew <- which(grepl(phenotype, colnames(dfw))) |
| 554 |
#* call .wide method on dfw |
|
| 555 | ! |
wide_res <- .wide_mv_mahalanobis_outliers(dfw, naTo0, phenotypew, group, cutoff) |
| 556 | ! |
pctRm <- wide_res[["pctRm"]] |
| 557 | ! |
sub_df <- wide_res[["data"]] |
| 558 |
#* label long data based on .wide output |
|
| 559 | ! |
kept <- unique(as.character(interaction(sub_df[!sub_df$outlier, c(group, idCol)]))) |
| 560 | ! |
df_ids <- as.character(interaction(df[, c(group, idCol)])) |
| 561 | ! |
df$outlier <- !df_ids %in% kept |
| 562 | ||
| 563 | ! |
return(list("data" = df, "pctRm" = pctRm))
|
| 564 |
} |
| 1 |
#' Ease of use multi-value trait model helper function. |
|
| 2 |
#' |
|
| 3 |
#' This function provides a simplified interface to modeling multi-value traits using \link{growthSS}.
|
|
| 4 |
#' Output from this should be passed to \link{fitGrowth} to fit the specified model.
|
|
| 5 |
#' |
|
| 6 |
#' @param form A formula similar to \code{label | value ~ time + id/group} where label is a column
|
|
| 7 |
#' of histogram bins, value is the counts within those bins, time is an optional time variable, |
|
| 8 |
#' id identifies an individual, and group contains the treatment groups. |
|
| 9 |
#' @param sigma Distributional models passed to \link{growthSS}.
|
|
| 10 |
#' @param df Data passed to \link{growthSS}.
|
|
| 11 |
#' @param pars Parameters to vary, passed to \link{growthSS}.
|
|
| 12 |
#' @param start Starting values or priors, passed to \link{growthSS}.
|
|
| 13 |
#' @param type Backend to use, passed to \link{growthSS}.
|
|
| 14 |
#' @param tau Quantile to model, passed to \link{growthSS}.
|
|
| 15 |
#' @param hierarchy Formulae describing any hierarchical models, see \link{growthSS}.
|
|
| 16 |
#' @param spectral_index Optionally, a spectral index |
|
| 17 |
#' \href{https://plantcv.readthedocs.io/en/stable/spectral_index/}{from those calculated by PlantCV}.
|
|
| 18 |
#' If this is given then the appropriate truncation and model family (if applicable) |
|
| 19 |
#' will be included for the index you are using without you having to write it in the formula. |
|
| 20 |
#' @keywords multi-value |
|
| 21 |
#' @return A named list of plots showing prior distributions that \code{growthSS} would use,
|
|
| 22 |
#' optionally with a plot of simulated growth curves using draws from those priors. |
|
| 23 |
#' |
|
| 24 |
#' @examples |
|
| 25 |
#' set.seed(123) |
|
| 26 |
#' mv_df <- mvSim(dists = list(rnorm = list(mean = 100, sd = 30)), wide = FALSE) |
|
| 27 |
#' mv_df$group <- rep(c("a", "b"), times = 900)
|
|
| 28 |
#' mv_df <- mv_df[mv_df$value > 0, ] |
|
| 29 |
#' mv_df$label <- as.numeric(gsub("sim_", "", mv_df$variable))
|
|
| 30 |
#' |
|
| 31 |
#' ss1 <- mvSS( |
|
| 32 |
#' model = "linear", form = label | value ~ group, df = mv_df, |
|
| 33 |
#' start = list("A" = 5), type = "brms", spectral_index = "ci_rededge"
|
|
| 34 |
#' ) |
|
| 35 |
#' \donttest{
|
|
| 36 |
#' mod1 <- fitGrowth(ss1, backend = "cmdstanr", iter = 1000, chains = 1, cores = 1) |
|
| 37 |
#' growthPlot(mod1, ss1$pcvrForm, df = ss1$df) |
|
| 38 |
#' } |
|
| 39 |
#' |
|
| 40 |
#' # when the model is longitudinal the same model is possible with growthSS |
|
| 41 |
#' |
|
| 42 |
#' m1 <- mvSim( |
|
| 43 |
#' dists = list( |
|
| 44 |
#' rnorm = list(mean = 100, sd = 30), |
|
| 45 |
#' rnorm = list(mean = 110, sd = 25), |
|
| 46 |
#' rnorm = list(mean = 120, sd = 20), |
|
| 47 |
#' rnorm = list(mean = 135, sd = 15) |
|
| 48 |
#' ), |
|
| 49 |
#' wide = FALSE, n = 6 |
|
| 50 |
#' ) |
|
| 51 |
#' m1$time <- rep(1:4, times = 6 * 180) |
|
| 52 |
#' m2 <- mvSim( |
|
| 53 |
#' dists = list( |
|
| 54 |
#' rnorm = list(mean = 85, sd = 25), |
|
| 55 |
#' rnorm = list(mean = 95, sd = 20), |
|
| 56 |
#' rnorm = list(mean = 105, sd = 15), |
|
| 57 |
#' rnorm = list(mean = 110, sd = 15) |
|
| 58 |
#' ), |
|
| 59 |
#' wide = FALSE, n = 6 |
|
| 60 |
#' ) |
|
| 61 |
#' m2$time <- rep(1:4, times = 6 * 180) |
|
| 62 |
#' mv_df2 <- rbind(m1, m2) |
|
| 63 |
#' mv_df2$group <- rep(c("a", "b"), each = 4320)
|
|
| 64 |
#' mv_df2 <- mv_df2[mv_df2$value > 0, ] |
|
| 65 |
#' mv_df2$label <- as.numeric(gsub("sim_", "", mv_df2$variable))
|
|
| 66 |
#' |
|
| 67 |
#' ss_mv1 <- mvSS( |
|
| 68 |
#' model = "linear", form = label | value ~ time | group, df = mv_df2, |
|
| 69 |
#' start = list("A" = 50), type = "brms", spectral_index = "ci_rededge"
|
|
| 70 |
#' ) |
|
| 71 |
#' ss_mv2 <- growthSS( |
|
| 72 |
#' model = "skew_normal: linear", |
|
| 73 |
#' form = label | resp_weights(value) + trunc(lb = -1, ub = Inf) ~ time | group, |
|
| 74 |
#' df = mv_df2, start = list("A" = 50)
|
|
| 75 |
#' ) |
|
| 76 |
#' identical(names(ss_mv1), names(ss_mv2)) |
|
| 77 |
#' # ignoring environments and other such details these are identical except for the |
|
| 78 |
#' # function call. |
|
| 79 |
#' unlist(lapply(names(ss_mv1), function(nm) {
|
|
| 80 |
#' if (!identical(ss_mv1[[nm]], ss_mv2[[nm]], |
|
| 81 |
#' ignore.environment = TRUE, |
|
| 82 |
#' ignore.srcref = TRUE |
|
| 83 |
#' )) {
|
|
| 84 |
#' if (!identical(as.character(ss_mv1[[nm]]), as.character(ss_mv2[[nm]]))) {
|
|
| 85 |
#' nm |
|
| 86 |
#' } |
|
| 87 |
#' } |
|
| 88 |
#' })) |
|
| 89 |
#' |
|
| 90 |
#' \donttest{
|
|
| 91 |
#' m2 <- fitGrowth(ss_mv1, backend = "cmdstanr", iter = 1000, chains = 1, cores = 1) |
|
| 92 |
#' growthPlot(m2, ss_mv1$pcvrForm, df = ss_mv1$df) |
|
| 93 |
#' } |
|
| 94 |
#' |
|
| 95 |
#' @export |
|
| 96 | ||
| 97 |
mvSS <- function(model = "linear", form, sigma = NULL, df, start = NULL, |
|
| 98 |
pars = NULL, type = "brms", tau = 0.5, hierarchy = NULL, |
|
| 99 |
spectral_index = c( |
|
| 100 |
"none", "ari", "ci_rededge", "cri550", "cri700", |
|
| 101 |
"egi", "evi", "gdvi", "mari", "mcari", "mtci", "ndre", |
|
| 102 |
"ndvi", "pri", "psnd_chlorophyll_a", "psnd_chlorophyll_b", |
|
| 103 |
"psnd_caroteniods", "psri", "pssr_chlorophyll_a", |
|
| 104 |
"pssr_chlorophyll_b", "pssr_caroteniods", "rgri", |
|
| 105 |
"rvsi", "savi", "sipi", "sr", "vari", "vi_green", "wi", |
|
| 106 |
"fvfm", "fqfm" |
|
| 107 |
)) {
|
|
| 108 |
#* `get spectral index helper function` |
|
| 109 |
#* spectral index function should just return truncation and family, then there will be a separate |
|
| 110 |
#* function that applies those changes to the form argument. |
|
| 111 | 2x |
pcvrForm <- form |
| 112 | 2x |
spec_helper <- get(paste0(".", spectral_index, "_mvss_hlp"))
|
| 113 |
#* `run spectral index helper function` |
|
| 114 | 2x |
spec_helper_res <- spec_helper() |
| 115 | 2x |
family <- spec_helper_res$family |
| 116 | 2x |
trunc <- spec_helper_res$trunc |
| 117 |
#* `run formula cleaner` |
|
| 118 |
#* This should return the final, usable formula and model |
|
| 119 | 2x |
form_res <- .mv_ss_formula_builder(form, family, trunc, type, model, df) |
| 120 | 2x |
form <- form_res$formula |
| 121 | 2x |
weights <- form_res$weights |
| 122 | 2x |
model <- form_res$model |
| 123 | 2x |
df <- form_res$df |
| 124 | 2x |
has_x_var <- form_res$has_x_var |
| 125 | 2x |
if (has_x_var) {
|
| 126 |
#* `if time is a variable, call growthSS with new model formula` |
|
| 127 | 1x |
out <- growthSS( |
| 128 | 1x |
model = model, form = form, sigma = sigma, df = df, start = start, |
| 129 | 1x |
pars = pars, type = type, tau = tau, hierarchy = hierarchy |
| 130 |
) |
|
| 131 |
} else {
|
|
| 132 |
#* `if x variable is missing then call mvSS_helpers to make simple model` |
|
| 133 |
#* this is the tricky part I think. but on the other hand if time is missing then it's |
|
| 134 |
#* really only one option for what happens next, so maybe it's not bad. Model is basically |
|
| 135 |
#* ignored in this case. |
|
| 136 | 1x |
form_fun <- get(paste0(".", type, "_mvss"))
|
| 137 | 1x |
out <- form_fun(form, df, start, family, model, tau, weights, pcvrForm) |
| 138 |
} |
|
| 139 | 2x |
out$call <- match.call() |
| 140 | 2x |
return(out) |
| 141 |
} |
|
| 142 | ||
| 143 | ||
| 144 |
#' `mvSS_formula_builder` |
|
| 145 |
#' add family, truncation, and weights to formula |
|
| 146 |
#' @keywords internal |
|
| 147 |
#' @noRd |
|
| 148 | ||
| 149 |
.mv_ss_formula_builder <- function(form, family, trunc, type, model, df) {
|
|
| 150 | 2x |
form_char <- as.character(form) |
| 151 | 2x |
has_x_var <- TRUE |
| 152 | 2x |
parsed <- .parsePcvrForm(form, df) |
| 153 | 2x |
df <- parsed$data |
| 154 | 2x |
if (!is.numeric(df[, parsed$x]) && !parsed$USEG && !parsed$USEID) {
|
| 155 | 1x |
has_x_var <- FALSE # treating single element of RHS as grouping |
| 156 |
} |
|
| 157 | 2x |
weights <- NULL |
| 158 |
#* for brms backend make weights in formula |
|
| 159 | 2x |
if (type == "brms") {
|
| 160 | 2x |
form_char[2] <- paste0(gsub("[|]", "| resp_weights(", form_char[2]), ")")
|
| 161 | 2x |
if (any(!is.infinite(trunc))) {
|
| 162 | 2x |
form_char[2] <- paste0(form_char[2], " + trunc(lb = ", trunc[1], ", ub = ", trunc[2], ")") |
| 163 |
} |
|
| 164 | 2x |
if (!grepl("[:]", model)) {
|
| 165 | 2x |
model <- paste0(family, ": ", model) |
| 166 |
} |
|
| 167 |
} else {
|
|
| 168 |
#* for other backends save weights variable |
|
| 169 | ! |
lhs <- trimws(strsplit(form_char[2], "[|]")[[1]]) |
| 170 | ! |
weights <- lhs[2] |
| 171 | ! |
form_char[2] <- lhs[1] |
| 172 |
} |
|
| 173 | 2x |
parsed_form <- as.formula(paste0(form_char[c(2, 1, 3)], collapse = "")) |
| 174 | 2x |
return(list( |
| 175 | 2x |
"formula" = parsed_form, |
| 176 | 2x |
"weights" = weights, |
| 177 | 2x |
"model" = model, |
| 178 | 2x |
"df" = df, |
| 179 | 2x |
"has_x_var" = has_x_var |
| 180 |
)) |
|
| 181 |
} |
|
| 182 | ||
| 183 |
#' `mvSS specified simple model helper functions` |
|
| 184 | ||
| 185 |
#' @keywords internal |
|
| 186 |
#' @noRd |
|
| 187 | ||
| 188 |
.brms_mvss <- function(form = NULL, df = NULL, start = NULL, family = NULL, model = NULL, tau = NULL, |
|
| 189 |
weights = NULL, pcvrform = NULL) {
|
|
| 190 | 1x |
out <- list() |
| 191 |
#* `Make bayesian non-linear formula` |
|
| 192 | 1x |
bf1 <- as.formula(paste0(as.character(form)[2], "~ A")) |
| 193 | 1x |
bf2 <- as.formula(paste0("A ~ 0 + ", as.character(form)[3]))
|
| 194 | ||
| 195 | 1x |
out[["formula"]] <- brms::bf(bf1, bf2, nl = TRUE) |
| 196 | 1x |
out[["prior"]] <- .makePriors( |
| 197 | 1x |
priors = start, |
| 198 | 1x |
pars = "A", df = df, |
| 199 | 1x |
group = "dummyGroup", # group from parse pcvr form should be dummy |
| 200 | 1x |
USEGROUP = FALSE, |
| 201 | 1x |
sigma = FALSE, family = family, |
| 202 | 1x |
formula = out[["formula"]] |
| 203 |
) |
|
| 204 | 1x |
out[["initfun"]] <- 0 # no fancy initialization here |
| 205 | 1x |
out[["df"]] <- df |
| 206 | 1x |
out[["family"]] <- family |
| 207 | 1x |
out[["pcvrForm"]] <- form |
| 208 | 1x |
out[["type"]] <- "brms" |
| 209 | 1x |
out[["model"]] <- trimws(gsub(".*:", "", model))
|
| 210 | 1x |
return(out) |
| 211 |
} |
|
| 212 | ||
| 213 |
#' @keywords internal |
|
| 214 |
#' @noRd |
|
| 215 | ||
| 216 |
.nls_mvss <- function(form = NULL, df = NULL, start = NULL, family = NULL, model = NULL, tau = NULL, |
|
| 217 |
weights = NULL, pcvrForm = NULL) {
|
|
| 218 | ! |
out <- list() |
| 219 | ! |
out[["formula"]] <- form |
| 220 | ! |
out[["start"]] <- NULL |
| 221 | ! |
out[["df"]] <- df |
| 222 | ! |
out[["pcvrForm"]] <- pcvrForm |
| 223 | ! |
out[["type"]] <- "lm" |
| 224 | ! |
out[["model"]] <- trimws(gsub(".*:", "", model))
|
| 225 | ! |
out[["weights"]] <- df[[weights]] |
| 226 | ! |
return(out) |
| 227 |
} |
|
| 228 | ||
| 229 |
#' @keywords internal |
|
| 230 |
#' @noRd |
|
| 231 | ||
| 232 |
.nlrq_mvss <- function(form = NULL, df = NULL, start = NULL, family = NULL, model = NULL, tau = NULL, |
|
| 233 |
weights = NULL, pcvrform = NULL) {
|
|
| 234 | ! |
out <- list() |
| 235 | ! |
out[["formula"]] <- form |
| 236 | ! |
out[["start"]] <- NULL |
| 237 | ! |
out[["df"]] <- df |
| 238 | ! |
out[["pcvrForm"]] <- pcvrform |
| 239 | ! |
out[["type"]] <- "rq" |
| 240 | ! |
out[["model"]] <- trimws(gsub(".*:", "", model))
|
| 241 | ! |
out[["taus"]] <- tau |
| 242 | ! |
out[["weights"]] <- df[[weights]] |
| 243 | ! |
return(out) |
| 244 |
} |
|
| 245 | ||
| 246 |
#' `Spectral Index helpers` |
|
| 247 | ||
| 248 |
#' @keywords internal |
|
| 249 |
#' @noRd |
|
| 250 |
.none_mvss_hlp <- function() {
|
|
| 251 | ! |
truncation <- c(Inf, Inf) # unbounded |
| 252 | ! |
family <- "student" # if not specified then leave as student T per default |
| 253 | ! |
return(list("trunc" = truncation, "family" = family))
|
| 254 |
} |
|
| 255 |
#' @keywords internal |
|
| 256 |
#' @noRd |
|
| 257 |
.ari_mvss_hlp <- function() {
|
|
| 258 | ! |
truncation <- c(Inf, Inf) |
| 259 | ! |
family <- "skew_normal" |
| 260 | ! |
return(list("trunc" = truncation, "family" = family))
|
| 261 |
} |
|
| 262 |
#' @keywords internal |
|
| 263 |
#' @noRd |
|
| 264 |
.ci_rededge_mvss_hlp <- function() {
|
|
| 265 | 2x |
truncation <- c(-1, Inf) |
| 266 | 2x |
family <- "skew_normal" |
| 267 | 2x |
return(list("trunc" = truncation, "family" = family))
|
| 268 |
} |
|
| 269 |
#' @keywords internal |
|
| 270 |
#' @noRd |
|
| 271 |
.cri550_mvss_hlp <- function() {
|
|
| 272 | ! |
truncation <- c(Inf, Inf) |
| 273 | ! |
family <- "skew_normal" |
| 274 | ! |
return(list("trunc" = truncation, "family" = family))
|
| 275 |
} |
|
| 276 |
#' @keywords internal |
|
| 277 |
#' @noRd |
|
| 278 |
.cri700_mvss_hlp <- function() {
|
|
| 279 | ! |
truncation <- c(Inf, Inf) |
| 280 | ! |
family <- "skew_normal" |
| 281 | ! |
return(list("trunc" = truncation, "family" = family))
|
| 282 |
} |
|
| 283 |
#' @keywords internal |
|
| 284 |
#' @noRd |
|
| 285 |
.egi_mvss_hlp <- function() {
|
|
| 286 | ! |
truncation <- c(-1, 2) |
| 287 | ! |
family <- "skew_normal" |
| 288 | ! |
return(list("trunc" = truncation, "family" = family))
|
| 289 |
} |
|
| 290 |
#' @keywords internal |
|
| 291 |
#' @noRd |
|
| 292 |
.evi_mvss_hlp <- function() {
|
|
| 293 | ! |
truncation <- c(Inf, Inf) |
| 294 | ! |
family <- "skew_normal" |
| 295 | ! |
return(list("trunc" = truncation, "family" = family))
|
| 296 |
} |
|
| 297 |
#' @keywords internal |
|
| 298 |
#' @noRd |
|
| 299 |
.gdvi_mvss_hlp <- function() {
|
|
| 300 | ! |
truncation <- c(-2, 2) |
| 301 | ! |
family <- "skew_normal" |
| 302 | ! |
return(list("trunc" = truncation, "family" = family))
|
| 303 |
} |
|
| 304 |
#' @keywords internal |
|
| 305 |
#' @noRd |
|
| 306 |
.mari_mvss_hlp <- function() {
|
|
| 307 | ! |
truncation <- c(Inf, Inf) |
| 308 | ! |
family <- "skew_normal" |
| 309 | ! |
return(list("trunc" = truncation, "family" = family))
|
| 310 |
} |
|
| 311 |
#' @keywords internal |
|
| 312 |
#' @noRd |
|
| 313 |
.mcari_mvss_hlp <- function() {
|
|
| 314 | ! |
truncation <- c(Inf, Inf) |
| 315 | ! |
family <- "skew_normal" |
| 316 | ! |
return(list("trunc" = truncation, "family" = family))
|
| 317 |
} |
|
| 318 |
#' @keywords internal |
|
| 319 |
#' @noRd |
|
| 320 |
.mtci_mvss_hlp <- function() {
|
|
| 321 | ! |
truncation <- c(Inf, Inf) |
| 322 | ! |
family <- "skew_normal" |
| 323 | ! |
return(list("trunc" = truncation, "family" = family))
|
| 324 |
} |
|
| 325 |
#' @keywords internal |
|
| 326 |
#' @noRd |
|
| 327 |
.ndre_mvss_hlp <- function() {
|
|
| 328 | ! |
truncation <- c(-1, 1) |
| 329 | ! |
family <- "skew_normal" |
| 330 | ! |
return(list("trunc" = truncation, "family" = family))
|
| 331 |
} |
|
| 332 |
#' @keywords internal |
|
| 333 |
#' @noRd |
|
| 334 |
.ndvi_mvss_hlp <- function() {
|
|
| 335 | ! |
truncation <- c(-1, 1) |
| 336 | ! |
family <- "skew_normal" |
| 337 | ! |
return(list("trunc" = truncation, "family" = family))
|
| 338 |
} |
|
| 339 |
#' @keywords internal |
|
| 340 |
#' @noRd |
|
| 341 |
.pri_mvss_hlp <- function() {
|
|
| 342 | ! |
truncation <- c(-1, 1) |
| 343 | ! |
family <- "skew_normal" |
| 344 | ! |
return(list("trunc" = truncation, "family" = family))
|
| 345 |
} |
|
| 346 |
#' @keywords internal |
|
| 347 |
#' @noRd |
|
| 348 |
.psnd_chlorophyll_a_mvss_hlp <- function() {
|
|
| 349 | ! |
truncation <- c(-1, 1) |
| 350 | ! |
family <- "skew_normal" |
| 351 | ! |
return(list("trunc" = truncation, "family" = family))
|
| 352 |
} |
|
| 353 |
#' @keywords internal |
|
| 354 |
#' @noRd |
|
| 355 |
.psnd_chlorophyll_b_mvss_hlp <- function() {
|
|
| 356 | ! |
truncation <- c(-1, 1) |
| 357 | ! |
family <- "skew_normal" |
| 358 | ! |
return(list("trunc" = truncation, "family" = family))
|
| 359 |
} |
|
| 360 |
#' @keywords internal |
|
| 361 |
#' @noRd |
|
| 362 |
.psnd_caroteniods_mvss_hlp <- function() {
|
|
| 363 | ! |
truncation <- c(-1, 1) |
| 364 | ! |
family <- "skew_normal" |
| 365 | ! |
return(list("trunc" = truncation, "family" = family))
|
| 366 |
} |
|
| 367 |
#' @keywords internal |
|
| 368 |
#' @noRd |
|
| 369 |
.psri_mvss_hlp <- function() {
|
|
| 370 | ! |
truncation <- c(Inf, Inf) |
| 371 | ! |
family <- "skew_normal" |
| 372 | ! |
return(list("trunc" = truncation, "family" = family))
|
| 373 |
} |
|
| 374 |
#' @keywords internal |
|
| 375 |
#' @noRd |
|
| 376 |
.pssr_chlorophyll_a_mvss_hlp <- function() {
|
|
| 377 | ! |
truncation <- c(-1, 1) |
| 378 | ! |
family <- "skew_normal" |
| 379 | ! |
return(list("trunc" = truncation, "family" = family))
|
| 380 |
} |
|
| 381 |
#' @keywords internal |
|
| 382 |
#' @noRd |
|
| 383 |
.pssr_chlorophyll_b_mvss_hlp <- function() {
|
|
| 384 | ! |
truncation <- c(-1, 1) |
| 385 | ! |
family <- "skew_normal" |
| 386 | ! |
return(list("trunc" = truncation, "family" = family))
|
| 387 |
} |
|
| 388 |
#' @keywords internal |
|
| 389 |
#' @noRd |
|
| 390 |
.pssr_caroteniods_mvss_hlp <- function() {
|
|
| 391 | ! |
truncation <- c(-1, 1) |
| 392 | ! |
family <- "skew_normal" |
| 393 | ! |
return(list("trunc" = truncation, "family" = family))
|
| 394 |
} |
|
| 395 |
#' @keywords internal |
|
| 396 |
#' @noRd |
|
| 397 |
.rgri_mvss_hlp <- function() {
|
|
| 398 | ! |
truncation <- c(0, Inf) |
| 399 | ! |
family <- "skew_normal" |
| 400 | ! |
return(list("trunc" = truncation, "family" = family))
|
| 401 |
} |
|
| 402 |
#' @keywords internal |
|
| 403 |
#' @noRd |
|
| 404 |
.rvsi_mvss_hlp <- function() {
|
|
| 405 | ! |
truncation <- c(-1, 1) |
| 406 | ! |
family <- "skew_normal" |
| 407 | ! |
return(list("trunc" = truncation, "family" = family))
|
| 408 |
} |
|
| 409 |
#' @keywords internal |
|
| 410 |
#' @noRd |
|
| 411 |
.savi_mvss_hlp <- function() {
|
|
| 412 | ! |
truncation <- c(-1.2, 1.2) |
| 413 | ! |
family <- "skew_normal" |
| 414 | ! |
return(list("trunc" = truncation, "family" = family))
|
| 415 |
} |
|
| 416 |
#' @keywords internal |
|
| 417 |
#' @noRd |
|
| 418 |
.sipi_mvss_hlp <- function() {
|
|
| 419 | ! |
truncation <- c(Inf, Inf) |
| 420 | ! |
family <- "skew_normal" |
| 421 | ! |
return(list("trunc" = truncation, "family" = family))
|
| 422 |
} |
|
| 423 |
#' @keywords internal |
|
| 424 |
#' @noRd |
|
| 425 |
.sr_mvss_hlp <- function() {
|
|
| 426 | ! |
truncation <- c(0, Inf) |
| 427 | ! |
family <- "skew_normal" |
| 428 | ! |
return(list("trunc" = truncation, "family" = family))
|
| 429 |
} |
|
| 430 |
#' @keywords internal |
|
| 431 |
#' @noRd |
|
| 432 |
.vari_mvss_hlp <- function() {
|
|
| 433 | ! |
truncation <- c(Inf, Inf) |
| 434 | ! |
family <- "skew_normal" |
| 435 | ! |
return(list("trunc" = truncation, "family" = family))
|
| 436 |
} |
|
| 437 |
#' @keywords internal |
|
| 438 |
#' @noRd |
|
| 439 |
.vi_green_mvss_hlp <- function() {
|
|
| 440 | ! |
truncation <- c(-1, 1) |
| 441 | ! |
family <- "skew_normal" |
| 442 | ! |
return(list("trunc" = truncation, "family" = family))
|
| 443 |
} |
|
| 444 |
#' @keywords internal |
|
| 445 |
#' @noRd |
|
| 446 |
.wi_mvss_hlp <- function() {
|
|
| 447 | ! |
truncation <- c(0, Inf) |
| 448 | ! |
family <- "skew_normal" |
| 449 | ! |
return(list("trunc" = truncation, "family" = family))
|
| 450 |
} |
|
| 451 |
#' @keywords internal |
|
| 452 |
#' @noRd |
|
| 453 |
.fvfm_mvss_hlp <- function() {
|
|
| 454 | ! |
truncation <- c(0, 1) |
| 455 | ! |
family <- "skew_normal" |
| 456 | ! |
return(list("trunc" = truncation, "family" = family))
|
| 457 |
} |
|
| 458 |
#' @keywords internal |
|
| 459 |
#' @noRd |
|
| 460 |
.fqfm_mvss_hlp <- function() {
|
|
| 461 | ! |
truncation <- c(0, 1) |
| 462 | ! |
family <- "skew_normal" |
| 463 | ! |
return(list("trunc" = truncation, "family" = family))
|
| 464 |
} |
| 1 |
#' Network analysis of a distance matrix |
|
| 2 |
#' |
|
| 3 |
#' @description Easy igraph use with pcv.emd output |
|
| 4 |
#' |
|
| 5 |
#' |
|
| 6 |
#' @param emd A long dataframe as returned by pcv.emd. |
|
| 7 |
#' Currently this function is only made to work with dataframe output, |
|
| 8 |
#' not distance matrix output. |
|
| 9 |
#' @param meta Metadata to be carried from pcv.emd output into the network, |
|
| 10 |
#' defaults to NULL which will use all metadata. |
|
| 11 |
#' Type conversion will be attempted for these columns. |
|
| 12 |
#' @param dissim Logical, should the distCol be inverted to make a dissimilarity value? |
|
| 13 |
#' @param distCol The name of the column containing distances/dissimilarities. |
|
| 14 |
#' Defaults to "emd" for compatability with pcv.emd |
|
| 15 |
#' @param filter This can be either a numeric (0.5) in which case it is taken as |
|
| 16 |
#' a filter where only edges with values greater than or equal to that number are |
|
| 17 |
#' kept or a character string ("0.5") in which case the strongest X percentage of edges are kept.
|
|
| 18 |
#' This defaults to 0.5 which does some filtering, although that should not be considered |
|
| 19 |
#' the best behavior for every setting. If this is NULL then your network will be |
|
| 20 |
#' almost always be a single blob, if set too high there will be very few nodes. |
|
| 21 |
#' Note that this filtering happens after converting to dissimilarity if dissim=TRUE. |
|
| 22 |
#' @param direction Direction of filtering, can be either "greater" or "lesser". |
|
| 23 |
#' @import ggplot2 |
|
| 24 |
#' @import igraph |
|
| 25 |
#' @importFrom utils type.convert |
|
| 26 |
#' @importFrom stats quantile |
|
| 27 |
#' |
|
| 28 |
#' @keywords emd multi-value-trait network |
|
| 29 |
#' @examples |
|
| 30 |
#' |
|
| 31 |
#' library(extraDistr) |
|
| 32 |
#' dists <- list( |
|
| 33 |
#' rmixnorm = list(mean = c(70, 150), sd = c(15, 5), alpha = c(0.3, 0.7)), |
|
| 34 |
#' rnorm = list(mean = 90, sd = 3) |
|
| 35 |
#' ) |
|
| 36 |
#' x <- mvSim( |
|
| 37 |
#' dists = dists, n_samples = 5, counts = 1000, |
|
| 38 |
#' min_bin = 1, max_bin = 180, wide = TRUE |
|
| 39 |
#' ) |
|
| 40 |
#' emd_df <- pcv.emd(x, |
|
| 41 |
#' cols = "sim", reorder = c("group"), mat = FALSE,
|
|
| 42 |
#' plot = FALSE, parallel = 1 |
|
| 43 |
#' ) |
|
| 44 |
#' net <- pcv.net(emd_df, meta = "group") |
|
| 45 |
#' net2 <- pcv.net(emd_df, meta = "group", filter = "0.9", direction = "lesser") |
|
| 46 |
#' |
|
| 47 |
#' @return Returns a list containing three elements: |
|
| 48 |
#' \code{nodes}: A dataframe of node data.
|
|
| 49 |
#' \code{edges}: A dataframe of edges between nodes.
|
|
| 50 |
#' \code{graph}: The network as an igraph object
|
|
| 51 |
#' |
|
| 52 |
#' @export |
|
| 53 |
#' |
|
| 54 | ||
| 55 |
pcv.net <- function(emd = NULL, meta = NULL, dissim = TRUE, distCol = "emd", filter = 0.5, |
|
| 56 |
direction = "greater") {
|
|
| 57 | 8x |
if (is.data.frame(emd)) {
|
| 58 |
#* convert to dissimilarity if metric is similarity |
|
| 59 | 8x |
if (dissim) {
|
| 60 | 8x |
emd[[distCol]] <- 1 / emd[[distCol]] |
| 61 | 8x |
emd[[distCol]] <- ifelse(is.infinite(emd[[distCol]]) | is.na(emd[[distCol]]), 0, emd[[distCol]]) |
| 62 |
} |
|
| 63 |
#* filter for edge strength |
|
| 64 | 8x |
if (!is.null(filter)) {
|
| 65 | 8x |
if (is.character(filter)) {
|
| 66 | 5x |
filter <- quantile(emd[[distCol]], probs = as.numeric(filter)) |
| 67 |
} |
|
| 68 | 8x |
if (match.arg(direction, c("greater", "lesser")) == "greater") {
|
| 69 | 7x |
emd <- emd[emd[[distCol]] > filter, ] |
| 70 |
} else {
|
|
| 71 | 1x |
emd <- emd[emd[[distCol]] < filter, ] |
| 72 |
} |
|
| 73 |
} |
|
| 74 |
#* turn long data into a graph and extract nodes/edges |
|
| 75 | 8x |
g <- igraph::graph_from_data_frame(emd, directed = FALSE) |
| 76 |
} else {
|
|
| 77 | ! |
stop("emd must be a dataframe.")
|
| 78 |
} |
|
| 79 | 8x |
if (is.null(meta)) {
|
| 80 | 5x |
meta <- unique(sub("_i$|_j$", "", colnames(emd)[grepl("_i$|_j$", colnames(emd))]))
|
| 81 |
} |
|
| 82 | ||
| 83 | 8x |
gg <- as.data.frame(igraph::layout_nicely(g)) |
| 84 | 8x |
both <- igraph::as_data_frame(g, "both") |
| 85 | 8x |
gg$index <- both$vertices$name |
| 86 | 8x |
eg <- both$edges |
| 87 |
#* link metadata to nodes |
|
| 88 | 8x |
metaIndex <- lapply(meta, function(m) which(grepl(m, colnames(eg)))) |
| 89 | 8x |
newCols <- (ncol(gg) + 1):(ncol(gg) + length(meta)) |
| 90 | 8x |
gg[, newCols] <- lapply(metaIndex, function(m) {
|
| 91 | 10x |
i <- m[[1]] |
| 92 | 10x |
j <- m[[2]] |
| 93 | 10x |
f <- eg[[i]][match(gg$index, eg$from)] |
| 94 | 10x |
to <- eg[[j]][match(gg$index, eg$to)] |
| 95 | 10x |
to[which(is.na(to))] <- f[which(is.na(to))] |
| 96 | 10x |
to |
| 97 | 8x |
}) # this can be NA if there is no 'from' edge connected to a node, so check 'to' edges as well. |
| 98 | 8x |
colnames(gg)[newCols] <- meta |
| 99 | 8x |
gg[, newCols] <- type.convert(gg[, newCols], as.is = TRUE) |
| 100 | ||
| 101 |
#* Calculate network metrics |
|
| 102 | 8x |
gg$betweenness <- igraph::betweenness(g) |
| 103 | 8x |
gg$degree <- igraph::degree(g) |
| 104 | 8x |
igraph::E(g)$weight <- eg[[distCol]] + 0.1 |
| 105 | 8x |
gg$strength <- igraph::strength(g) |
| 106 | 8x |
gg$harmonic_centrality <- igraph::harmonic_centrality(g) |
| 107 | 8x |
gg$eigen_centrality <- igraph::eigen_centrality(g)[[1]] |
| 108 | 8x |
gg$authority_score <- igraph::authority_score(g)[[1]] |
| 109 | 8x |
gg$page_rank <- igraph::page_rank(g)[[1]] |
| 110 |
#* add coordinates for plotting edges |
|
| 111 | 8x |
eg$from.x <- gg$V1[match(eg$from, gg$index)] |
| 112 | 8x |
eg$from.y <- gg$V2[match(eg$from, gg$index)] |
| 113 | 8x |
eg$to.x <- gg$V1[match(eg$to, gg$index)] |
| 114 | 8x |
eg$to.y <- gg$V2[match(eg$to, gg$index)] |
| 115 | 8x |
return(list("nodes" = gg, "edges" = eg, "graph" = g))
|
| 116 |
} |
| 1 |
#' Growth data simulating function |
|
| 2 |
#' |
|
| 3 |
#' @description growthSim can be used to help pick reasonable parameters for common |
|
| 4 |
#' growth models to use in prior distributions or to simulate data for example models/plots. |
|
| 5 |
#' |
|
| 6 |
#' @param model One of "logistic", "gompertz", "weibull", "frechet", "gumbel", "monomolecular", |
|
| 7 |
#' "exponential", "linear", "power law", "logarithmic", "bragg", |
|
| 8 |
#' "double logistic", or "double gompertz". |
|
| 9 |
#' Alternatively this can be a pseudo formula to generate data from a segmented growth curve by |
|
| 10 |
#' specifying "model1 + model2", see examples and \code{\link{growthSS}}.
|
|
| 11 |
#' Decay can be specified by including "decay" as part of the model such as "logistic decay" or |
|
| 12 |
#' "linear + linear decay". Count data can be specified with the "count: " prefix, |
|
| 13 |
#' similar to using "poisson: model" in \link{growthSS}.
|
|
| 14 |
#' While "gam" models are supported by \code{growthSS}
|
|
| 15 |
#' they are not simulated by this function. |
|
| 16 |
#' @param n Number of individuals to simulate over time per each group in params |
|
| 17 |
#' @param t Max time (assumed to start at 1) to simulate growth to as an integer. |
|
| 18 |
#' @param params A list of numeric parameters. A, B, C notation is used in the order that parameters |
|
| 19 |
#' appear in the formula (see examples). Number of groups is inferred from the length of these vectors |
|
| 20 |
#' of parameters. In the case of the "double" models there are also A2, B2, and C2 terms. |
|
| 21 |
#' Changepoints should be specified as "changePointX" or "fixedChangePointX" as in |
|
| 22 |
#' \code{\link{growthSS}}.
|
|
| 23 |
#' @param D If decay is being simulated then this is the starting point for decay. This defaults to 0. |
|
| 24 |
#' |
|
| 25 |
#' @return Returns a dataframe of example growth data following the input parameters. |
|
| 26 |
#' |
|
| 27 |
#' @importFrom stats rnorm setNames |
|
| 28 |
#' |
|
| 29 |
#' @examples |
|
| 30 |
#' |
|
| 31 |
#' library(ggplot2) |
|
| 32 |
#' simdf <- growthSim("logistic",
|
|
| 33 |
#' n = 20, t = 25, |
|
| 34 |
#' params = list("A" = c(200, 160), "B" = c(13, 11), "C" = c(3, 3.5))
|
|
| 35 |
#' ) |
|
| 36 |
#' ggplot(simdf, aes(time, y, group = interaction(group, id))) + |
|
| 37 |
#' geom_line(aes(color = group)) + |
|
| 38 |
#' labs(title = "Logistic") |
|
| 39 |
#' |
|
| 40 |
#' simdf <- growthSim("gompertz",
|
|
| 41 |
#' n = 20, t = 25, |
|
| 42 |
#' params = list("A" = c(200, 160), "B" = c(13, 11), "C" = c(0.2, 0.25))
|
|
| 43 |
#' ) |
|
| 44 |
#' ggplot(simdf, aes(time, y, group = interaction(group, id))) + |
|
| 45 |
#' geom_line(aes(color = group)) + |
|
| 46 |
#' labs(title = "Gompertz") |
|
| 47 |
#' |
|
| 48 |
#' simdf <- growthSim("weibull",
|
|
| 49 |
#' n = 20, t = 25, |
|
| 50 |
#' params = list("A" = c(100, 100), "B" = c(1, 0.75), "C" = c(2, 3))
|
|
| 51 |
#' ) |
|
| 52 |
#' ggplot(simdf, aes(time, y, group = interaction(group, id))) + |
|
| 53 |
#' geom_line(aes(color = group)) + |
|
| 54 |
#' labs(title = "weibull") |
|
| 55 |
#' |
|
| 56 |
#' simdf <- growthSim("frechet",
|
|
| 57 |
#' n = 20, t = 25, |
|
| 58 |
#' params = list("A" = c(100, 110), "B" = c(2, 1.5), "C" = c(5, 2))
|
|
| 59 |
#' ) |
|
| 60 |
#' ggplot(simdf, aes(time, y, group = interaction(group, id))) + |
|
| 61 |
#' geom_line(aes(color = group)) + |
|
| 62 |
#' labs(title = "frechet") |
|
| 63 |
#' |
|
| 64 |
#' simdf <- growthSim("gumbel",
|
|
| 65 |
#' n = 20, t = 25, |
|
| 66 |
#' list("A" = c(120, 140), "B" = c(6, 5), "C" = c(4, 3))
|
|
| 67 |
#' ) |
|
| 68 |
#' ggplot(simdf, aes(time, y, group = interaction(group, id))) + |
|
| 69 |
#' geom_line(aes(color = group)) + |
|
| 70 |
#' labs(title = "gumbel") |
|
| 71 |
#' |
|
| 72 |
#' simdf <- growthSim("double logistic",
|
|
| 73 |
#' n = 20, t = 70, |
|
| 74 |
#' params = list( |
|
| 75 |
#' "A" = c(200, 160), "B" = c(13, 11), "C" = c(3, 3.5), |
|
| 76 |
#' "A2" = c(400, 300), "B2" = c(35, 40), "C2" = c(3.25, 2.75) |
|
| 77 |
#' ) |
|
| 78 |
#' ) |
|
| 79 |
#' ggplot(simdf, aes(time, y, group = interaction(group, id))) + |
|
| 80 |
#' geom_line(aes(color = group)) + |
|
| 81 |
#' labs(title = "Double Logistic") |
|
| 82 |
#' |
|
| 83 |
#' simdf <- growthSim("double gompertz",
|
|
| 84 |
#' n = 20, t = 100, |
|
| 85 |
#' params = list( |
|
| 86 |
#' "A" = c(180, 140), "B" = c(13, 11), "C" = c(0.2, 0.2), |
|
| 87 |
#' "A2" = c(400, 300), "B2" = c(50, 50), "C2" = c(0.1, 0.1) |
|
| 88 |
#' ) |
|
| 89 |
#' ) |
|
| 90 |
#' ggplot(simdf, aes(time, y, group = interaction(group, id))) + |
|
| 91 |
#' geom_line(aes(color = group)) + |
|
| 92 |
#' labs(title = "Double Gompertz") |
|
| 93 |
#' |
|
| 94 |
#' simdf <- growthSim("monomolecular",
|
|
| 95 |
#' n = 20, t = 25, |
|
| 96 |
#' params = list("A" = c(200, 160), "B" = c(0.08, 0.1))
|
|
| 97 |
#' ) |
|
| 98 |
#' ggplot(simdf, aes(time, y, group = interaction(group, id))) + |
|
| 99 |
#' geom_line(aes(color = group)) + |
|
| 100 |
#' labs(title = "Monomolecular") |
|
| 101 |
#' |
|
| 102 |
#' simdf <- growthSim("exponential",
|
|
| 103 |
#' n = 20, t = 25, |
|
| 104 |
#' params = list("A" = c(15, 20), "B" = c(0.095, 0.095))
|
|
| 105 |
#' ) |
|
| 106 |
#' ggplot(simdf, aes(time, y, group = interaction(group, id))) + |
|
| 107 |
#' geom_line(aes(color = group)) + |
|
| 108 |
#' labs(title = "Exponential") |
|
| 109 |
#' |
|
| 110 |
#' simdf <- growthSim("linear",
|
|
| 111 |
#' n = 20, t = 25, |
|
| 112 |
#' params = list("A" = c(1.1, 0.95))
|
|
| 113 |
#' ) |
|
| 114 |
#' ggplot(simdf, aes(time, y, group = interaction(group, id))) + |
|
| 115 |
#' geom_line(aes(color = group)) + |
|
| 116 |
#' labs(title = "Linear") |
|
| 117 |
#' |
|
| 118 |
#' simdf <- growthSim("logarithmic",
|
|
| 119 |
#' n = 20, t = 25, |
|
| 120 |
#' params = list("A" = c(2, 1.7))
|
|
| 121 |
#' ) |
|
| 122 |
#' ggplot(simdf, aes(time, y, group = interaction(group, id))) + |
|
| 123 |
#' geom_line(aes(color = group)) + |
|
| 124 |
#' labs(title = "Logarithmic") |
|
| 125 |
#' |
|
| 126 |
#' simdf <- growthSim("power law",
|
|
| 127 |
#' n = 20, t = 25, |
|
| 128 |
#' params = list("A" = c(16, 11), "B" = c(0.75, 0.7))
|
|
| 129 |
#' ) |
|
| 130 |
#' ggplot(simdf, aes(time, y, group = interaction(group, id))) + |
|
| 131 |
#' geom_line(aes(color = group)) + |
|
| 132 |
#' labs(title = "Power Law") |
|
| 133 |
#' |
|
| 134 |
#' simdf <- growthSim("bragg",
|
|
| 135 |
#' n = 20, t = 100, |
|
| 136 |
#' list("A" = c(10, 15), "B" = c(0.01, 0.02), "C" = c(50, 60))
|
|
| 137 |
#' ) |
|
| 138 |
#' ggplot(simdf, aes(time, y, group = interaction(group, id))) + |
|
| 139 |
#' geom_line(aes(color = group)) + |
|
| 140 |
#' labs(title = "bragg") |
|
| 141 |
#' |
|
| 142 |
#' # simulating models from segmented growth models |
|
| 143 |
#' |
|
| 144 |
#' simdf <- growthSim( |
|
| 145 |
#' model = "linear + linear", n = 20, t = 25, |
|
| 146 |
#' params = list("linear1A" = c(16, 11), "linear2A" = c(0.75, 0.7), "changePoint1" = c(11, 14))
|
|
| 147 |
#' ) |
|
| 148 |
#' ggplot(simdf, aes(time, y, group = interaction(group, id))) + |
|
| 149 |
#' geom_line(aes(color = group)) + |
|
| 150 |
#' labs(title = "linear + linear") |
|
| 151 |
#' |
|
| 152 |
#' simdf <- growthSim( |
|
| 153 |
#' model = "linear + linear decay", n = 20, t = 25, |
|
| 154 |
#' params = list("linear1A" = c(16, 11), "linear2A" = c(3, 2), "changePoint1" = c(11, 14))
|
|
| 155 |
#' ) |
|
| 156 |
#' ggplot(simdf, aes(time, y, group = interaction(group, id))) + |
|
| 157 |
#' geom_line(aes(color = group)) + |
|
| 158 |
#' labs(title = "linear + linear decay") |
|
| 159 |
#' |
|
| 160 |
#' simdf <- growthSim( |
|
| 161 |
#' model = "linear + linear + logistic", n = 20, t = 50, |
|
| 162 |
#' params = list( |
|
| 163 |
#' "linear1A" = c(16, 11), "linear2A" = c(3, 4), # linear slopes, very intuitive |
|
| 164 |
#' "changePoint1" = c(11, 14), "changePoint2" = c(10, 12), |
|
| 165 |
#' # changepoint1 is standard, changepoint2 happens relative to changepoint 1 |
|
| 166 |
#' "logistic3A" = c(200, 210), "logistic3B" = c(20, 25), "logistic3C" = c(3, 3) |
|
| 167 |
#' ) |
|
| 168 |
#' ) |
|
| 169 |
#' # similar to changepoint2, the asymptote and inflection point are relative to the starting |
|
| 170 |
#' # point of the logistic growth component. This is different than the model output |
|
| 171 |
#' # if you were to fit a curve to this model using `growthSS`. |
|
| 172 |
#' ggplot(simdf, aes(time, y, group = interaction(group, id))) + |
|
| 173 |
#' geom_line(aes(color = group)) + |
|
| 174 |
#' labs(title = "linear + linear + logistic") |
|
| 175 |
#' |
|
| 176 |
#' @details |
|
| 177 |
#' The \code{params} argument requires some understanding of how each growth model is parameterized.
|
|
| 178 |
#' Examples of each are below should help, as will the examples. |
|
| 179 |
#' \itemize{
|
|
| 180 |
#' \item \bold{Logistic}: `A / (1 + exp( (B-x)/C) )`
|
|
| 181 |
#' Where A is the asymptote, B is the inflection point, C is the growth rate. |
|
| 182 |
#' \item \bold{Gompertz}: `A * exp(-B * exp(-C*x))`
|
|
| 183 |
#' Where A is the asymptote, B is the inflection point, C is the growth rate. |
|
| 184 |
#' \item \bold{Weibull}: `A * (1-exp(-(x/C)^B))`
|
|
| 185 |
#' Where A is the asymptote, B is the weibull shape parameter, C is the weibull scale parameter. |
|
| 186 |
#' \item \bold{Frechet}: `A * exp(-((x-0)/C)^(-B))`
|
|
| 187 |
#' Where A is the asymptote, B is the frechet shape parameter, C is the frechet scale parameter. |
|
| 188 |
#' Note that the location parameter (conventionally m) is 0 in these models for simplicity but is |
|
| 189 |
#' still included in the formula. |
|
| 190 |
#' \item \bold{Gumbel}: `A * exp(-exp(-(x-B)/C))`
|
|
| 191 |
#' Where A is the asymptote, B is the inflection point (location), C is the growth rate (scale). |
|
| 192 |
#' \item \bold{Double Logistic}: `A / (1+exp((B-x)/C)) + ((A2-A) /(1+exp((B2-x)/C2)))`
|
|
| 193 |
#' Where A is the asymptote, B is the inflection point, C is the growth rate, |
|
| 194 |
#' A2 is the second asymptote, B2 is the second inflection point, and C2 is the second |
|
| 195 |
#' growth rate. |
|
| 196 |
#' \item \bold{Double Gompertz}: `A * exp(-B * exp(-C*x)) + ((A2-A) * exp(-B2 * exp(-C2*(x-B))))`
|
|
| 197 |
#' Where A is the asymptote, B is the inflection point, C is the growth rate, |
|
| 198 |
#' A2 is the second asymptote, B2 is the second inflection point, and C2 is the second |
|
| 199 |
#' growth rate. |
|
| 200 |
#' \item \bold{Monomolecular}: `A-A * exp(-B * x)`
|
|
| 201 |
#' Where A is the asymptote and B is the growth rate. |
|
| 202 |
#' \item \bold{Exponential}: `A * exp(B * x)`
|
|
| 203 |
#' Where A is the scale parameter and B is the growth rate. |
|
| 204 |
#' \item \bold{Linear}: `A * x`
|
|
| 205 |
#' Where A is the growth rate. |
|
| 206 |
#' \item \bold{Logarithmic}: `A * log(x)`
|
|
| 207 |
#' Where A is the growth rate. |
|
| 208 |
#' \item \bold{Power Law}: `A * x ^ (B)`
|
|
| 209 |
#' Where A is the scale parameter and B is the growth rate. |
|
| 210 |
#' \item \bold{Bragg}: `A * exp(-B * (x - C) ^ 2)`
|
|
| 211 |
#' This models minima and maxima as a dose-response curve where A is the max response, |
|
| 212 |
#' B is the "precision" or slope at inflection, and C is the x position of the max response. |
|
| 213 |
#' \item \bold{Lorentz}: `A / (1 + B * (x - C) ^ 2)`
|
|
| 214 |
#' This models minima and maxima as a dose-response curve where A is the max response, |
|
| 215 |
#' B is the "precision" or slope at inflection, and C is the x position of the max response. |
|
| 216 |
#' Generally Bragg is preferred to Lorentz for dose-response curves. |
|
| 217 |
#' \item \bold{Beta}: `A * (((x - D) / (C - D)) * ((E - x) / (E - C)) ^ ((E - C) / (C - D))) ^ B`
|
|
| 218 |
#' This models minima and maxima as a dose-response curve where A is the Maximum Value, |
|
| 219 |
#' B is a shape/concavity exponent similar to the sum of alpha and beta in a Beta distribution, |
|
| 220 |
#' C is the position of maximum value, D is the minimum position where distribution > 0, |
|
| 221 |
#' E is the maximum position where distribution > 0. |
|
| 222 |
#' This is a difficult model to fit but can model non-symmetric dose-response relationships which |
|
| 223 |
#' may sometimes be worth the extra effort. |
|
| 224 |
#' } |
|
| 225 |
#' Note that for these distributions parameters generally do not exist in a vacuum. |
|
| 226 |
#' Changing one will make the others look different in the resulting data. |
|
| 227 |
#' The examples are a good place to start if you are unsure what parameters to use. |
|
| 228 |
#' |
|
| 229 |
#' @export |
|
| 230 |
#' |
|
| 231 |
#' |
|
| 232 | ||
| 233 |
growthSim <- function( |
|
| 234 |
model = c( |
|
| 235 |
"logistic", "gompertz", "double logistic", "double gompertz", |
|
| 236 |
"monomolecular", "exponential", "linear", "power law", "frechet", |
|
| 237 |
"weibull", "gumbel", "logarithmic", "bragg", "lorentz", "beta" |
|
| 238 |
), |
|
| 239 |
n = 20, t = 25, params = list(), D = 0) {
|
|
| 240 | 497x |
if (grepl("count:", model)) {
|
| 241 | 2x |
COUNT <- TRUE |
| 242 | 2x |
model <- trimws(gsub("count:", "", model))
|
| 243 |
} else {
|
|
| 244 | 495x |
COUNT <- FALSE |
| 245 |
} |
|
| 246 | 497x |
if (is.null(names(params))) {
|
| 247 | 2x |
names(params) <- c(LETTERS[seq_along(params)]) |
| 248 |
} |
|
| 249 | 497x |
if (any(names(params) %in% letters)) {
|
| 250 | 1x |
names(params) <- c(LETTERS[which(letters %in% substr(names(params), 1, 1))]) |
| 251 |
} |
|
| 252 | 497x |
params <- as.list(params) |
| 253 | 497x |
noise <- lapply(params, function(i) mean(i) / 10) |
| 254 | 497x |
names(noise) <- names(params) |
| 255 | 497x |
if (any(grepl("fixedChangePoint", names(noise), ignore.case = TRUE))) {
|
| 256 | 1x |
noise[grepl("fixedChangePoint", names(noise))] <- 0
|
| 257 | 1x |
nms <- names(noise) |
| 258 | 1x |
nms <- sub("fixedC", "c", nms)
|
| 259 | 1x |
names(noise) <- nms |
| 260 | 1x |
nms <- names(params) |
| 261 | 1x |
nms <- sub("fixedC", "c", nms)
|
| 262 | 1x |
names(params) <- nms |
| 263 |
} |
|
| 264 | ||
| 265 |
#* check that params are all the same length, if not then rep until they are |
|
| 266 | 497x |
if (!all(unlist(lapply(params, length)) == max(unlist(lapply(params, length))))) {
|
| 267 | 1x |
message("params are not uniform length, values are being recycled to fit max length")
|
| 268 | 1x |
diffLengths <- which(!unlist(lapply(params, length)) == max(unlist(lapply(params, length)))) |
| 269 | 1x |
params[diffLengths] <- lapply( |
| 270 | 1x |
diffLengths, |
| 271 | 1x |
function(i) {
|
| 272 | 2x |
rep(params[[i]], length.out = max(unlist(lapply(params, length)))) |
| 273 |
} |
|
| 274 |
) |
|
| 275 |
} |
|
| 276 |
#* decide which internal funciton to use |
|
| 277 | 497x |
if (!grepl("\\+", model)) {
|
| 278 | 492x |
out <- .singleGrowthSim(model, n, t, params, noise, D) |
| 279 |
} else {
|
|
| 280 | 5x |
out <- .multiGrowthSim(model, n, t, params, noise, D) |
| 281 |
} |
|
| 282 | 496x |
if (COUNT) {
|
| 283 | 2x |
out <- do.call(rbind, lapply(split(out, interaction(out$group, out$id)), function(sub) {
|
| 284 | 60x |
sub$y <- round(cummax(sub$y)) |
| 285 | 60x |
sub |
| 286 |
})) |
|
| 287 | 2x |
rownames(out) <- NULL |
| 288 |
} |
|
| 289 | 496x |
return(out) |
| 290 |
} |
|
| 291 | ||
| 292 |
#' Internal helper function to simulate growth from a series of growth models |
|
| 293 |
#' @keywords internal |
|
| 294 |
#' @noRd |
|
| 295 | ||
| 296 |
.multiGrowthSim <- function(model, n = 20, t = 25, params = list(), noise = NULL, D = 0) {
|
|
| 297 | 5x |
component_models <- trimws(strsplit(model, "\\+")[[1]]) |
| 298 | ||
| 299 | 5x |
firstModel <- component_models[1] |
| 300 | 5x |
firstModel <- trimws(firstModel) |
| 301 | 5x |
firstModelFindParams <- trimws(gsub("decay", "", firstModel))
|
| 302 | 5x |
firstParams <- params[grepl(paste0(firstModelFindParams, "1"), names(params))] |
| 303 | 5x |
firstChangepoints <- params[["changePoint1"]] |
| 304 | 5x |
firstNoise <- noise[grepl(paste0(firstModelFindParams, "1|changePoint1"), names(noise))] |
| 305 | 5x |
names(firstNoise) <- sub(paste0(firstModelFindParams, "1|Point."), "", names(firstNoise)) |
| 306 | ||
| 307 | 5x |
if (is.null(firstChangepoints)) {
|
| 308 | 1x |
stop("Simulating segmented data requires 'changePointX' parameters as described in growthSS.")
|
| 309 |
} |
|
| 310 | ||
| 311 | 4x |
df1 <- do.call(rbind, lapply(1:n, function(i) {
|
| 312 | 80x |
firstChangepointsRand <- lapply(firstChangepoints, function(fc) {
|
| 313 | 140x |
round(rnorm(1, fc, firstNoise$change)) |
| 314 |
}) |
|
| 315 | ||
| 316 | 80x |
n_df <- do.call(rbind, lapply(seq_along(firstChangepointsRand), function(g) {
|
| 317 | 140x |
.singleGrowthSim(firstModel, |
| 318 | 140x |
n = 1, t = firstChangepointsRand[[g]], |
| 319 | 140x |
params = stats::setNames( |
| 320 | 140x |
lapply(firstParams, function(l) l[[g]]), |
| 321 | 140x |
c(sub(paste0(firstModel, "1"), "", names(firstParams))) |
| 322 |
), |
|
| 323 | 140x |
noise = firstNoise, D |
| 324 |
) |
|
| 325 |
})) |
|
| 326 | 80x |
n_df$group <- rep(letters[seq_along(firstChangepointsRand)], |
| 327 | 80x |
times = unlist(firstChangepointsRand) |
| 328 |
) |
|
| 329 | 80x |
n_df$id <- paste0("id_", i)
|
| 330 | 80x |
n_df |
| 331 |
})) |
|
| 332 | 4x |
dataList <- list(df1) |
| 333 | ||
| 334 | 4x |
for (u in 2:length(component_models)) {
|
| 335 | 5x |
iterModel <- component_models[u] |
| 336 | 5x |
iterModel <- trimws(iterModel) |
| 337 | 5x |
iterModelFindParams <- trimws(gsub("decay", "", iterModel))
|
| 338 | 5x |
iterParams <- params[grepl(paste0(iterModelFindParams, u), names(params))] |
| 339 | ||
| 340 | 5x |
nextChangepoints <- params[[paste0("changePoint", u)]]
|
| 341 | 5x |
iterNoise <- noise[grepl(paste0(iterModelFindParams, u, "|changePoint", u), names(noise))] |
| 342 | 5x |
names(iterNoise) <- sub(paste0(iterModelFindParams, u, "|Point."), "", names(iterNoise)) |
| 343 | ||
| 344 | 5x |
iter_data <- do.call(rbind, lapply(1:n, function(i) {
|
| 345 | 100x |
if (is.null(nextChangepoints) | u == length(component_models)) {
|
| 346 | 80x |
iterChangepointsRand <- rep(t, length(iterParams[[1]])) |
| 347 |
} else {
|
|
| 348 | 20x |
iterChangepointsRand <- lapply(nextChangepoints, function(fc) {
|
| 349 | 40x |
round(rnorm(1, fc, iterNoise$change)) |
| 350 |
}) |
|
| 351 |
} |
|
| 352 | 100x |
n_df <- do.call(rbind, lapply(seq_along(iterChangepointsRand), function(g) {
|
| 353 | 180x |
if (u == length(component_models)) {
|
| 354 | 140x |
gt <- t - max(df1[df1$id == paste0("id_", i), "time"])
|
| 355 |
} else {
|
|
| 356 | 40x |
gt <- iterChangepointsRand[[g]] |
| 357 |
} |
|
| 358 | ||
| 359 | 180x |
inner_df <- .singleGrowthSim(iterModel, |
| 360 | 180x |
n = 1, t = gt, |
| 361 | 180x |
params = stats::setNames( |
| 362 | 180x |
lapply(iterParams, function(l) l[[g]]), |
| 363 | 180x |
c(sub(paste0(iterModelFindParams, u), "", names(iterParams))) |
| 364 |
), |
|
| 365 | 180x |
noise = iterNoise, D |
| 366 |
) |
|
| 367 | 180x |
inner_df$group <- letters[g] |
| 368 | 180x |
inner_df |
| 369 |
})) |
|
| 370 | 100x |
n_df$id <- paste0("id_", i)
|
| 371 | 100x |
n_df |
| 372 |
})) |
|
| 373 | ||
| 374 | 5x |
prev_data <- dataList[[(u - 1)]] |
| 375 | ||
| 376 | 5x |
new_data <- do.call(rbind, lapply(unique(paste0(iter_data$id, iter_data$group)), function(int) {
|
| 377 | 180x |
prev_data_sub <- prev_data[paste0(prev_data$id, prev_data$group) == int, ] |
| 378 | 180x |
iter_data_sub <- iter_data[paste0(iter_data$id, iter_data$group) == int, ] |
| 379 | 180x |
y_end <- prev_data_sub[prev_data_sub$time == max(prev_data_sub$time), "y"] |
| 380 | 180x |
iter_data_sub$time <- iter_data_sub$time + max(prev_data_sub$time) |
| 381 | 180x |
iter_data_sub$y <- iter_data_sub$y - iter_data_sub$y[1] |
| 382 | 180x |
iter_data_sub$y <- y_end + iter_data_sub$y |
| 383 | 180x |
iter_data_sub |
| 384 |
})) |
|
| 385 | 5x |
dataList[[(u)]] <- new_data |
| 386 |
} |
|
| 387 | 4x |
out <- do.call(rbind, dataList) |
| 388 | 4x |
out <- out[out$time < t, ] |
| 389 | 4x |
return(out) |
| 390 |
} |
|
| 391 | ||
| 392 | ||
| 393 | ||
| 394 | ||
| 395 |
#' Internal helper function to simulate growth from a single growth model |
|
| 396 |
#' @keywords internal |
|
| 397 |
#' @noRd |
|
| 398 | ||
| 399 |
.singleGrowthSim <- function(model, n = 20, t = 25, params = list(), noise = NULL, D) {
|
|
| 400 | 812x |
models <- c( |
| 401 | 812x |
"logistic", "gompertz", "double logistic", "double gompertz", |
| 402 | 812x |
"monomolecular", "exponential", "linear", "power law", "frechet", "weibull", "gumbel", |
| 403 | 812x |
"logarithmic", "bragg", "lorentz", "beta" |
| 404 |
) |
|
| 405 | ||
| 406 | 812x |
if (grepl("decay", model)) {
|
| 407 | 40x |
decay <- TRUE |
| 408 | 40x |
model <- trimws(gsub("decay", "", model))
|
| 409 |
} else {
|
|
| 410 | 772x |
decay <- FALSE |
| 411 |
} |
|
| 412 | ||
| 413 | 812x |
matched_model <- match.arg(model, models) |
| 414 | 812x |
gsi <- match.fun(paste0("gsi_", gsub(" ", "", matched_model)))
|
| 415 | ||
| 416 | 812x |
if (decay) {
|
| 417 | 40x |
gsid <- function(D = 0, ...) {
|
| 418 | 40x |
D - gsi(...) |
| 419 |
} |
|
| 420 |
} else {
|
|
| 421 | 772x |
gsid <- function(D = 0, ...) {
|
| 422 | 4730x |
0 + gsi(...) |
| 423 |
} |
|
| 424 |
} |
|
| 425 | ||
| 426 | 812x |
out <- do.call(rbind, lapply(seq_along(params[[1]]), function(i) {
|
| 427 | 1309x |
pars <- lapply(params, function(p) p[i]) |
| 428 | 1309x |
as.data.frame(rbind(do.call(rbind, lapply(1:n, function(e) {
|
| 429 | 4770x |
data.frame( |
| 430 | 4770x |
"id" = paste0("id_", e), "group" = letters[i], "time" = 1:t,
|
| 431 | 4770x |
"y" = gsid(D = D, 1:t, pars, noise), stringsAsFactors = FALSE |
| 432 |
) |
|
| 433 |
})))) |
|
| 434 |
})) |
|
| 435 | ||
| 436 | 812x |
return(out) |
| 437 |
} |
|
| 438 | ||
| 439 | ||
| 440 |
#* ************************************************************ |
|
| 441 |
#* ***** `gsi functions to simulate individual plants` ***** |
|
| 442 |
#* ************************************************************ |
|
| 443 | ||
| 444 |
gsi_logistic <- function(x, pars, noise) {
|
|
| 445 | 1150x |
a_r <- pars[["A"]] + rnorm(1, mean = 0, sd = noise[["A"]]) |
| 446 | 1150x |
b_r <- pars[["B"]] + rnorm(1, mean = 0, sd = noise[["B"]]) |
| 447 | 1150x |
c_r <- pars[["C"]] + rnorm(1, mean = 0, sd = noise[["C"]]) |
| 448 | 1150x |
return(a_r / (1 + exp((b_r - x) / c_r))) |
| 449 |
} |
|
| 450 |
gsi_gompertz <- function(x, pars, noise) {
|
|
| 451 | 1680x |
a_r <- pars[["A"]] + rnorm(1, mean = 0, sd = noise[["A"]]) |
| 452 | 1680x |
b_r <- pars[["B"]] + rnorm(1, mean = 0, sd = noise[["B"]]) |
| 453 | 1680x |
c_r <- pars[["C"]] + rnorm(1, mean = 0, sd = noise[["C"]]) |
| 454 | 1680x |
return(a_r * exp(-b_r * exp(-c_r * x))) |
| 455 |
} |
|
| 456 |
gsi_doublelogistic <- function(x, pars, noise) {
|
|
| 457 | 40x |
a_r <- pars[["A"]] + rnorm(1, mean = 0, sd = noise[["A"]]) |
| 458 | 40x |
b_r <- pars[["B"]] + rnorm(1, mean = 0, sd = noise[["B"]]) |
| 459 | 40x |
c_r <- pars[["C"]] + rnorm(1, mean = 0, sd = noise[["C"]]) |
| 460 | 40x |
a2_r <- pars[["A2"]] + rnorm(1, mean = 0, sd = noise[["A2"]]) |
| 461 | 40x |
b2_r <- pars[["B2"]] + rnorm(1, mean = 0, sd = noise[["B2"]]) |
| 462 | 40x |
c2_r <- pars[["C2"]] + rnorm(1, mean = 0, sd = noise[["C2"]]) |
| 463 | 40x |
return(a_r / (1 + exp((b_r - x) / c_r)) + ((a2_r - a_r) / (1 + exp((b2_r - x) / c2_r)))) |
| 464 |
} |
|
| 465 |
gsi_doublegompertz <- function(x, pars, noise) {
|
|
| 466 | 40x |
a_r <- pars[["A"]] + rnorm(1, mean = 0, sd = noise[["A"]]) |
| 467 | 40x |
b_r <- pars[["B"]] + rnorm(1, mean = 0, sd = noise[["B"]]) |
| 468 | 40x |
c_r <- pars[["C"]] + rnorm(1, mean = 0, sd = noise[["C"]]) |
| 469 | 40x |
a2_r <- pars[["A2"]] + rnorm(1, mean = 0, sd = noise[["A2"]]) |
| 470 | 40x |
b2_r <- pars[["B2"]] + rnorm(1, mean = 0, sd = noise[["B2"]]) |
| 471 | 40x |
c2_r <- pars[["C2"]] + rnorm(1, mean = 0, sd = noise[["C2"]]) |
| 472 | 40x |
return((a_r * exp(-b_r * exp(-c_r * x))) + ((a2_r - a_r) * exp(-b2_r * exp(-c2_r * (x - b_r))))) |
| 473 |
} |
|
| 474 |
gsi_monomolecular <- function(x, pars, noise) {
|
|
| 475 | 220x |
a_r <- pars[["A"]] + rnorm(1, mean = 0, sd = noise[["A"]]) |
| 476 | 220x |
b_r <- pars[["B"]] + rnorm(1, mean = 0, sd = noise[["B"]]) |
| 477 | 220x |
return(a_r - a_r * exp(-b_r * x)) |
| 478 |
} |
|
| 479 |
gsi_exponential <- function(x, pars, noise) {
|
|
| 480 | 180x |
a_r <- pars[["A"]] + rnorm(1, mean = 0, sd = noise[["A"]]) |
| 481 | 180x |
b_r <- pars[["B"]] + rnorm(1, mean = 0, sd = noise[["B"]]) |
| 482 | 180x |
return(a_r * exp(b_r * x)) |
| 483 |
} |
|
| 484 |
gsi_linear <- function(x, pars, noise) {
|
|
| 485 | 580x |
a_r <- pars[["A"]] + rnorm(1, mean = 0, sd = noise[["A"]]) |
| 486 | 580x |
return(a_r * x) |
| 487 |
} |
|
| 488 |
gsi_powerlaw <- function(x, pars, noise) {
|
|
| 489 | 180x |
a_r <- pars[["A"]] + rnorm(1, mean = 0, sd = noise[["A"]]) |
| 490 | 180x |
b_r <- pars[["B"]] + rnorm(1, mean = 0, sd = noise[["B"]]) |
| 491 | 180x |
return(a_r * x^(b_r)) |
| 492 |
} |
|
| 493 |
gsi_logarithmic <- function(x, pars, noise) {
|
|
| 494 | 160x |
a_r <- pars[["A"]] + rnorm(1, mean = 0, sd = noise[["A"]]) |
| 495 | 160x |
return(a_r * log(x)) |
| 496 |
} |
|
| 497 |
gsi_frechet <- function(x, pars, noise) {
|
|
| 498 | 100x |
a_r <- pars[["A"]] + rnorm(1, mean = 0, sd = noise[["A"]]) |
| 499 | 100x |
b_r <- max(c(0, pars[["B"]] + rnorm(1, mean = 0, sd = noise[["B"]]))) |
| 500 | 100x |
c_r <- max(c(0, pars[["C"]] + rnorm(1, mean = 0, sd = noise[["C"]]))) |
| 501 |
# holding location to 0, b is shape parameter, c is scale (growth rate) |
|
| 502 | 100x |
return(a_r * exp(-((x - 0) / c_r)^(-b_r))) |
| 503 |
} |
|
| 504 |
gsi_gumbel <- function(x, pars, noise) {
|
|
| 505 | 100x |
a_r <- pars[["A"]] + rnorm(1, mean = 0, sd = noise[["A"]]) |
| 506 | 100x |
b_r <- pars[["B"]] + rnorm(1, mean = 0, sd = noise[["B"]]) |
| 507 | 100x |
c_r <- pars[["C"]] + rnorm(1, mean = 0, sd = noise[["C"]]) |
| 508 |
# b is location, c is scale (rate) |
|
| 509 | 100x |
return(a_r * exp(-exp(-(x - b_r) / c_r))) |
| 510 |
} |
|
| 511 |
gsi_weibull <- function(x, pars, noise) {
|
|
| 512 | 100x |
a_r <- pars[["A"]] + rnorm(1, mean = 0, sd = noise[["A"]]) |
| 513 | 100x |
b_r <- pars[["B"]] + rnorm(1, mean = 0, sd = noise[["B"]]) |
| 514 | 100x |
c_r <- pars[["C"]] + rnorm(1, mean = 0, sd = noise[["C"]]) |
| 515 |
# c is scale, b is shape |
|
| 516 | 100x |
return(a_r * (1 - exp(-(x / c_r)^b_r))) |
| 517 |
} |
|
| 518 |
gsi_bragg <- function(x, pars, noise) {
|
|
| 519 | 120x |
a_r <- pars[["A"]] + rnorm(1, mean = 0, sd = noise[["A"]]) |
| 520 | 120x |
b_r <- pars[["B"]] + rnorm(1, mean = 0, sd = noise[["B"]]) |
| 521 | 120x |
c_r <- pars[["C"]] + rnorm(1, mean = 0, sd = noise[["C"]]) |
| 522 |
# a is max response, b is precision, c is x position of max response |
|
| 523 | 120x |
return(a_r * exp(-b_r * (x - c_r)^2)) |
| 524 |
} |
|
| 525 |
gsi_lorentz <- function(x, pars, noise) {
|
|
| 526 | 80x |
a_r <- pars[["A"]] + rnorm(1, mean = 0, sd = noise[["A"]]) |
| 527 | 80x |
b_r <- pars[["B"]] + rnorm(1, mean = 0, sd = noise[["B"]]) |
| 528 | 80x |
c_r <- pars[["C"]] + rnorm(1, mean = 0, sd = noise[["C"]]) |
| 529 |
# a is max response, b is precision, c is x position of max response |
|
| 530 | 80x |
return(a_r / (1 + b_r * (x - c_r)^2)) |
| 531 |
} |
|
| 532 |
gsi_beta <- function(x, pars, noise) {
|
|
| 533 | 40x |
a_r <- pars[["A"]] + rnorm(1, mean = 0, sd = noise[["A"]]) |
| 534 | 40x |
b_r <- pars[["B"]] + rnorm(1, mean = 0, sd = noise[["B"]]) |
| 535 | 40x |
c_r <- pars[["C"]] + rnorm(1, mean = 0, sd = noise[["C"]]) |
| 536 | 40x |
d_r <- pars[["D"]] + rnorm(1, mean = 0, sd = noise[["D"]]) |
| 537 | 40x |
e_r <- pars[["E"]] + rnorm(1, mean = 0, sd = noise[["E"]]) |
| 538 | 40x |
y <- a_r * (((x - d_r) / (c_r - d_r)) * ((e_r - x) / (e_r - c_r))^((e_r - c_r) / (c_r - d_r)))^b_r |
| 539 | 40x |
return(y) |
| 540 |
} |
| 1 |
#' @description |
|
| 2 |
#' Internal function for calculating \mu and \kappa of a distribution represented by multi value |
|
| 3 |
#' traits. |
|
| 4 |
#' @param s1 A data.frame or matrix of multi value traits. The column names should include a number |
|
| 5 |
#' between 0.0001 and 0.9999 representing the "bin". |
|
| 6 |
#' @examples |
|
| 7 |
#' mv_gauss <- mvSim( |
|
| 8 |
#' dists = list( |
|
| 9 |
#' rnorm = list(mean = 50, sd = 10) |
|
| 10 |
#' ), |
|
| 11 |
#' n_samples = 30 |
|
| 12 |
#' ) |
|
| 13 |
#' .conj_vonmises_mv( |
|
| 14 |
#' s1 = mv_gauss[, -1], priors = list(mu = 30, kappa = 1, boundary = c(0, 180)), |
|
| 15 |
#' cred.int.level = 0.95, |
|
| 16 |
#' plot = TRUE |
|
| 17 |
#' ) |
|
| 18 |
#' @keywords internal |
|
| 19 |
#' @noRd |
|
| 20 | ||
| 21 |
.conj_vonmises_mv <- function(s1 = NULL, priors = NULL, |
|
| 22 |
plot = FALSE, support = NULL, cred.int.level = NULL, |
|
| 23 |
calculatingSupport = FALSE) {
|
|
| 24 |
#* `Turn off support for consistent rescaling between boundaries and to avoid default length of 10000` |
|
| 25 | 17x |
support <- NULL |
| 26 |
#* `Reorder columns if they are not in the numeric order` |
|
| 27 | 17x |
histColsBin <- as.numeric(sub("[a-zA-Z_.]+", "", colnames(s1)))
|
| 28 | 17x |
bins_order <- sort(histColsBin, index.return = TRUE)$ix |
| 29 | 17x |
s1 <- s1[, bins_order] |
| 30 |
#* `Turn s1 matrix into a vector` |
|
| 31 | 17x |
X1 <- rep(histColsBin[bins_order], as.numeric(round(colSums(s1)))) |
| 32 |
#* `make default prior if none provided` |
|
| 33 | 17x |
default_prior <- list( |
| 34 | 17x |
mu = 0, kappa = 0.5, |
| 35 | 17x |
boundary = c(-pi, pi), |
| 36 | 17x |
known_kappa = 1, n = 1 |
| 37 |
) |
|
| 38 | 17x |
if (is.null(priors)) {
|
| 39 | 1x |
priors <- default_prior |
| 40 |
} |
|
| 41 |
#* `if any elements are missing from prior then use defaults` |
|
| 42 | 17x |
priors <- stats::setNames(lapply(names(default_prior), function(nm) {
|
| 43 | 85x |
if (nm %in% names(priors)) {
|
| 44 | 69x |
return(priors[[nm]]) |
| 45 |
} else {
|
|
| 46 | 16x |
return(default_prior[[nm]]) |
| 47 |
} |
|
| 48 | 17x |
}), names(default_prior)) |
| 49 |
#* `rescale data to [-pi, pi] according to boundary` |
|
| 50 | 17x |
X1 <- .boundary.to.radians(x = X1, boundary = priors$boundary) |
| 51 |
#* `rescale prior on mu to [-pi, pi] according to boundary` |
|
| 52 | 17x |
mu_radians <- .boundary.to.radians(x = priors$mu, boundary = priors$boundary) |
| 53 |
#* `Raise error if the boundary is wrong and data is not on [-pi, pi]` |
|
| 54 | 17x |
if (any(abs(X1) > pi)) {
|
| 55 | 1x |
stop(paste0( |
| 56 | 1x |
"Values must be on [-pi, pi] after rescaling. ", |
| 57 | 1x |
"Does the boundary element in your prior include all your data?" |
| 58 |
)) |
|
| 59 |
} |
|
| 60 |
#* `Define dense Support` |
|
| 61 | 16x |
if (is.null(support)) {
|
| 62 | 16x |
if (calculatingSupport) {
|
| 63 | 8x |
return(priors$boundary) #* this would be [-pi, pi] if using radians, but plotting will be on |
| 64 |
#* the original scale so we can just return the boundary and use [-pi, pi] as support here |
|
| 65 |
} |
|
| 66 | 8x |
support_boundary <- seq(min(priors$boundary), max(priors$boundary), by = 0.0005) |
| 67 | 8x |
support <- seq(-pi, pi, length.out = length(support_boundary)) |
| 68 |
} |
|
| 69 | 8x |
out <- list() |
| 70 |
#* `Get weighted mean of data and prior for half tangent adjustment` |
|
| 71 | 8x |
cm <- .circular.mean(c(X1, mu_radians), w = c(rep(nrow(s1) / length(X1), length(X1)), priors$n)) |
| 72 | 8x |
unitCircleAdj <- ifelse(abs(cm) <= pi / 2, 0, pi) |
| 73 | 8x |
unitCircleAdj <- ifelse(cm > 0, 1, -1) * unitCircleAdj |
| 74 |
#* `Update prior parameters` |
|
| 75 | 8x |
a <- priors$kappa |
| 76 | 8x |
b <- mu_radians |
| 77 | 8x |
kappa_known <- priors$known_kappa |
| 78 | 8x |
kappa_prime <- kappa_known * .unbiased.kappa(X1) |
| 79 |
#* workaround for samples where kappa becomes negative if using the updating from the compendium |
|
| 80 |
#* where kappa prime is kappa_known x (A x sin B) + sum of sin data |
|
| 81 |
#* |
|
| 82 |
#* compendium and other sources I have read so far do not address this situation. |
|
| 83 |
#* seems like this would come up a lot, only difference I have seen is using [-pi, pi] vs |
|
| 84 |
#* the compendiums [0, 2pi], but I don't think that should make a difference. |
|
| 85 | ||
| 86 | 8x |
mu_prime_atan_scale <- atan(((a * sin(b)) + sum(sin(X1))) / ((a * cos(b)) + sum(cos(X1)))) |
| 87 | 8x |
mu_prime <- unitCircleAdj + mu_prime_atan_scale |
| 88 |
#* `calculate density over support` |
|
| 89 | 8x |
dens1 <- brms::dvon_mises(support, mu_prime, kappa_prime) |
| 90 | 8x |
pdf1 <- dens1 / sum(dens1) |
| 91 |
#* `calculate highest density interval` |
|
| 92 |
#* note there is no qvon_mises function, so I am using bayestestR::hdi on |
|
| 93 |
#* posterior draws and rescaled posterior draws |
|
| 94 | 8x |
draws <- brms::rvon_mises(10000, mu_prime, kappa_prime) |
| 95 | 8x |
hdi_v1 <- as.numeric(bayestestR::hdi(draws, ci = cred.int.level))[2:3] |
| 96 | 8x |
draws2 <- draws |
| 97 | 8x |
draws2[draws2 < 0] <- draws2[draws2 < 0] + 2 * pi |
| 98 | 8x |
hdi_v2 <- as.numeric(bayestestR::hdi(draws2, ci = cred.int.level))[2:3] |
| 99 | 8x |
hdis <- list(hdi_v1, hdi_v2) |
| 100 | 8x |
hdi <- hdis[[which.min(c(diff(hdi_v1), diff(hdi_v2)))]] |
| 101 | 8x |
hdi[hdi > pi] <- hdi[hdi > pi] - (2 * pi) # if the second hdi was narrower then fix the part beyond pi |
| 102 |
#* `store highest density estimate` |
|
| 103 | 8x |
hde <- mu_prime |
| 104 |
#* `Rescale HDI, HDE, and draws, from radians to boundary units` |
|
| 105 | 8x |
hdi_boundary <- .radians.to.boundary(hdi, target = priors$boundary) |
| 106 | 8x |
hde_boundary <- .radians.to.boundary(hde, target = priors$boundary) |
| 107 | 8x |
draws_boundary <- .radians.to.boundary(draws, target = priors$boundary) |
| 108 |
#* `save summary and parameters` |
|
| 109 | 8x |
out$summary <- data.frame( |
| 110 | 8x |
HDE_1 = hde_boundary, |
| 111 | 8x |
HDI_1_low = hdi_boundary[1], |
| 112 | 8x |
HDI_1_high = hdi_boundary[2] |
| 113 |
) |
|
| 114 | 8x |
out$posterior$mu <- hde_boundary # rescaled mu_prime |
| 115 | 8x |
out$posterior$kappa <- kappa_prime |
| 116 | 8x |
out$posterior$known_kappa <- priors$known_kappa |
| 117 | 8x |
out$posterior$n <- priors$n + nrow(s1) |
| 118 | 8x |
out$posterior$boundary <- priors$boundary |
| 119 |
#* `Store Posterior Draws` |
|
| 120 | 8x |
out$posteriorDraws <- draws_boundary |
| 121 | 8x |
out$pdf <- pdf1 |
| 122 |
#* `keep data for plotting` |
|
| 123 | 8x |
if (plot) {
|
| 124 | 4x |
out$plot_df <- data.frame( |
| 125 | 4x |
"range" = support_boundary, "prob" = pdf1, |
| 126 | 4x |
"sample" = rep("Sample 1", length(support_boundary))
|
| 127 |
) |
|
| 128 |
} |
|
| 129 | 8x |
return(out) |
| 130 |
} |
|
| 131 | ||
| 132 | ||
| 133 |
#' @description |
|
| 134 |
#' Internal function for calculating \mu and \kappa of a distribution represented by single value |
|
| 135 |
#' traits. |
|
| 136 |
#' @param s1 A vector of numerics drawn from a beta distribution. |
|
| 137 |
#' @examples |
|
| 138 |
#' .conj_vonmises_sv( |
|
| 139 |
#' s1 = brms::rvon_mises(100, 2, 2), priors = list(mu = 0.5, kappa = 0.5), |
|
| 140 |
#' cred.int.level = 0.95, |
|
| 141 |
#' plot = FALSE |
|
| 142 |
#' ) |
|
| 143 |
#' .conj_vonmises_sv( |
|
| 144 |
#' s1 = rnorm(20, 90, 20), |
|
| 145 |
#' priors = list(mu = 75, kappa = 0.5, boundary = c(0, 180), known_kappa = 2), |
|
| 146 |
#' cred.int.level = 0.95, |
|
| 147 |
#' plot = TRUE |
|
| 148 |
#' ) |
|
| 149 |
#' @keywords internal |
|
| 150 |
#' @noRd |
|
| 151 | ||
| 152 |
.conj_vonmises_sv <- function(s1 = NULL, priors = NULL, |
|
| 153 |
plot = FALSE, support = NULL, cred.int.level = NULL, |
|
| 154 |
calculatingSupport = FALSE) {
|
|
| 155 |
#* `to avoid default support length of 10000 which may not span boundary well` |
|
| 156 | 21x |
support <- NULL |
| 157 |
#* `make default prior if none provided` |
|
| 158 | 21x |
default_prior <- list( |
| 159 | 21x |
mu = 0, kappa = 0.5, |
| 160 | 21x |
boundary = c(-pi, pi), |
| 161 | 21x |
known_kappa = 1, n = 1 |
| 162 |
) |
|
| 163 | 21x |
if (is.null(priors)) {
|
| 164 | 1x |
priors <- default_prior |
| 165 |
} |
|
| 166 |
#* `if any elements are missing from prior then use defaults` |
|
| 167 | 21x |
priors <- stats::setNames(lapply(names(default_prior), function(nm) {
|
| 168 | 105x |
if (nm %in% names(priors)) {
|
| 169 | 73x |
return(priors[[nm]]) |
| 170 |
} else {
|
|
| 171 | 32x |
return(default_prior[[nm]]) |
| 172 |
} |
|
| 173 | 21x |
}), names(default_prior)) |
| 174 |
#* `rescale data to [-pi, pi] according to boundary` |
|
| 175 | 21x |
s1 <- .boundary.to.radians(x = s1, boundary = priors$boundary) |
| 176 |
#* `rescale prior on mu to [-pi, pi] according to boundary` |
|
| 177 | 21x |
mu_radians <- .boundary.to.radians(x = priors$mu, boundary = priors$boundary) |
| 178 |
#* `Raise error if the boundary is wrong and data is not on [-pi, pi]` |
|
| 179 | 21x |
if (any(abs(s1) > pi)) {
|
| 180 | 1x |
stop(paste0( |
| 181 | 1x |
"Values must be on [-pi, pi] after rescaling. ", |
| 182 | 1x |
"Does the boundary element in your prior include all your data?" |
| 183 |
)) |
|
| 184 |
} |
|
| 185 |
#* `Define dense Support` |
|
| 186 | 20x |
if (is.null(support)) {
|
| 187 | 20x |
if (calculatingSupport) {
|
| 188 | 10x |
return(priors$boundary) #* this would be [-pi, pi] if using radians, but plotting will be on |
| 189 |
#* the original scale so we can just return the boundary and use [-pi, pi] as support here |
|
| 190 |
} |
|
| 191 | 10x |
support_boundary <- seq(min(priors$boundary), max(priors$boundary), by = 0.0005) |
| 192 | 10x |
support <- seq(-pi, pi, length.out = length(support_boundary)) |
| 193 |
} |
|
| 194 | 10x |
out <- list() |
| 195 |
#* `Get weighted mean of data and prior for half tangent adjustment` |
|
| 196 | 10x |
cm <- .circular.mean(c(s1, mu_radians), w = c(rep(1, length(s1)), priors$n)) |
| 197 | 10x |
unitCircleAdj <- ifelse(abs(cm) <= pi / 2, 0, pi) |
| 198 | 10x |
unitCircleAdj <- ifelse(cm > 0, 1, -1) * unitCircleAdj |
| 199 |
#* `Update prior parameters` |
|
| 200 | 10x |
a <- priors$kappa |
| 201 | 10x |
b <- mu_radians |
| 202 | 10x |
kappa_known <- priors$known_kappa |
| 203 | 10x |
kappa_known <- priors$known_kappa |
| 204 | 10x |
kappa_prime <- kappa_known * .unbiased.kappa(s1) |
| 205 |
#* workaround for samples where kappa becomes negative if using the updating from the compendium |
|
| 206 |
#* where kappa prime is kappa_known x (A x sin B) + sum of sin data |
|
| 207 |
#* compendium and other sources I have read so far do not address this problem. |
|
| 208 |
#* seems like this would come up a lot, only difference I have seen is using [-pi, pi] vs |
|
| 209 |
#* the compendiums [0, 2pi], but I don't think that should make a difference. |
|
| 210 | 10x |
mu_prime_atan_scale <- atan(((a * sin(b)) + sum(sin(s1))) / ((a * cos(b)) + sum(cos(s1)))) |
| 211 | 10x |
mu_prime <- unitCircleAdj + mu_prime_atan_scale |
| 212 |
#* `calculate density over support` |
|
| 213 | 10x |
dens1 <- brms::dvon_mises(support, mu_prime, kappa_prime) |
| 214 | 10x |
pdf1 <- dens1 / sum(dens1) |
| 215 |
#* `calculate highest density interval` |
|
| 216 |
#* note there is no qvon_mises function, so I am using bayestestR::hdi on |
|
| 217 |
#* posterior draws and rescaled posterior draws |
|
| 218 | 10x |
draws <- brms::rvon_mises(10000, mu_prime, kappa_prime) |
| 219 | 10x |
hdi_v1 <- as.numeric(bayestestR::hdi(draws, ci = cred.int.level))[2:3] |
| 220 | 10x |
draws2 <- draws |
| 221 | 10x |
draws2[draws2 < 0] <- draws2[draws2 < 0] + 2 * pi |
| 222 | 10x |
hdi_v2 <- as.numeric(bayestestR::hdi(draws2, ci = cred.int.level))[2:3] |
| 223 | 10x |
hdis <- list(hdi_v1, hdi_v2) |
| 224 | 10x |
hdi <- hdis[[which.min(c(diff(hdi_v1), diff(hdi_v2)))]] |
| 225 | 10x |
hdi[hdi > pi] <- hdi[hdi > pi] - (2 * pi) # if the second hdi was narrower then fix the part beyond pi |
| 226 |
#* `store highest density estimate` |
|
| 227 | 10x |
hde <- mu_prime |
| 228 |
#* `Rescale HDI, HDE, draws, and support from radians to boundary units` |
|
| 229 | 10x |
hdi_boundary <- .radians.to.boundary(hdi, target = priors$boundary) |
| 230 | 10x |
hde_boundary <- .radians.to.boundary(hde, target = priors$boundary) |
| 231 | 10x |
draws_boundary <- .radians.to.boundary(draws, target = priors$boundary) |
| 232 | 10x |
support_boundary <- .radians.to.boundary(support, target = priors$boundary) |
| 233 |
#* `save summary and parameters` |
|
| 234 | 10x |
out$summary <- data.frame( |
| 235 | 10x |
HDE_1 = hde_boundary, |
| 236 | 10x |
HDI_1_low = hdi_boundary[1], |
| 237 | 10x |
HDI_1_high = hdi_boundary[2] |
| 238 |
) |
|
| 239 | 10x |
out$posterior$mu <- hde_boundary # rescaled mu_prime |
| 240 | 10x |
out$posterior$kappa <- kappa_prime |
| 241 | 10x |
out$posterior$known_kappa <- priors$known_kappa |
| 242 | 10x |
out$posterior$n <- priors$n + length(s1) |
| 243 | 10x |
out$posterior$boundary <- priors$boundary |
| 244 |
#* `Store Posterior Draws` |
|
| 245 | 10x |
out$posteriorDraws <- draws_boundary |
| 246 | 10x |
out$pdf <- pdf1 |
| 247 |
#* `keep data for plotting` |
|
| 248 | 10x |
if (plot) {
|
| 249 | 6x |
out$plot_df <- data.frame( |
| 250 | 6x |
"range" = support_boundary, "prob" = pdf1, |
| 251 | 6x |
"sample" = rep("Sample 1", length(support_boundary))
|
| 252 |
) |
|
| 253 |
} |
|
| 254 | 10x |
return(out) |
| 255 |
} |
|
| 256 | ||
| 257 |
#' @description |
|
| 258 |
#' Weighted Circular Mean function for use in von mises distribution conjugate function |
|
| 259 |
#' @param x A vector of numerics drawn from a beta distribution. |
|
| 260 |
#' @param w optional weights vector |
|
| 261 |
#' @examples |
|
| 262 |
#' \donttest{
|
|
| 263 |
#' .circular.mean(brms::rvon_mises(20, -3.1, 4)) |
|
| 264 |
#' } |
|
| 265 |
#' @keywords internal |
|
| 266 |
#' @noRd |
|
| 267 | ||
| 268 |
.circular.mean <- function(x, w = NULL) {
|
|
| 269 | 64x |
if (is.null(w)) {
|
| 270 | 32x |
w <- rep(1, length(x)) |
| 271 |
} |
|
| 272 | 64x |
sinr <- sum(sin(x) * w) |
| 273 | 64x |
cosr <- sum(cos(x) * w) |
| 274 | 64x |
circmean <- atan2(sinr, cosr) |
| 275 | 64x |
return(circmean) |
| 276 |
} |
|
| 277 | ||
| 278 |
#' @description |
|
| 279 |
#' Function for rescaling any circular distribution to radians based on the edges. |
|
| 280 |
#' @param x A vector of numerics from a circular process |
|
| 281 |
#' @param boundary The edges of the circular process as a 2L numeric |
|
| 282 |
#' @param target the target circular space, should not be changed from radians (-pi, pi). |
|
| 283 |
#' @examples |
|
| 284 |
#' \donttest{
|
|
| 285 |
#' x <- seq(-6, 6, length.out = 20) |
|
| 286 |
#' .boundary.to.radians(x, c(6.3, 6.3)) |
|
| 287 |
#' |
|
| 288 |
#' x <- seq(0, 100, length.out = 20) |
|
| 289 |
#' .boundary.to.radians(x, c(0, 100)) |
|
| 290 |
#' } |
|
| 291 |
#' @keywords internal |
|
| 292 |
#' @noRd |
|
| 293 | ||
| 294 |
.boundary.to.radians <- function(x, boundary, target = c(-pi, pi)) {
|
|
| 295 | 158x |
x1 <- (target[2] - target[1]) / (boundary[2] - boundary[1]) * (x - boundary[2]) + target[2] |
| 296 | 158x |
return(x1) |
| 297 |
} |
|
| 298 | ||
| 299 |
#' @description |
|
| 300 |
#' Convenience function for easier reading. Same as .boundary.to.radians() with different defaults. |
|
| 301 |
#' @param x A vector of numerics from a von mises distribution |
|
| 302 |
#' @param boundary The edges of the circular process, |
|
| 303 |
#' generally these should not be changed from radians. |
|
| 304 |
#' @param target the target circular space, should be from priors$boundary. |
|
| 305 |
#' @examples |
|
| 306 |
#' \donttest{
|
|
| 307 |
#' x <- brms::rvon_mises(20, 2, 3) |
|
| 308 |
#' .radians.to.boundary(x, target = c(6.3, 6.3)) |
|
| 309 |
#' |
|
| 310 |
#' x <- brms::rvon_mises(20, 3.1, 2) |
|
| 311 |
#' .radians.to.boundary(x, target = c(0, 100)) |
|
| 312 |
#' } |
|
| 313 |
#' @keywords internal |
|
| 314 |
#' @noRd |
|
| 315 | ||
| 316 |
.radians.to.boundary <- function(x, boundary = c(-pi, pi), target = c(-100, 100)) {
|
|
| 317 | 117x |
x1 <- (target[2] - target[1]) / (boundary[2] - boundary[1]) * (x - boundary[2]) + target[2] |
| 318 | 117x |
return(x1) |
| 319 |
} |
|
| 320 | ||
| 321 |
#' @description |
|
| 322 |
#' Calculate difference in draws from posterior of a von-mises distribution that has been |
|
| 323 |
#' rescaled to exist on some circular space defined by the prior boundaries. |
|
| 324 |
#' @param draws1 draws from a distribution |
|
| 325 |
#' @param draws2 draws from another distribution |
|
| 326 |
#' @param boundary a boundary vector describing the circular space's edges. Should be from priors. |
|
| 327 |
#' @examples |
|
| 328 |
#' \donttest{
|
|
| 329 |
#' draws1 <- brms::rvon_mises(10000, 3.1, 4) |
|
| 330 |
#' draws2 <- brms::rvon_mises(10000, -3, 2) |
|
| 331 |
#' x <- .conj_rope_circular_diff(draws1, draws2) |
|
| 332 |
#' } |
|
| 333 |
#' @keywords internal |
|
| 334 |
#' @noRd |
|
| 335 | ||
| 336 |
.conj_rope_circular_diff <- function(draws1, draws2, boundary = c(-pi, pi)) {
|
|
| 337 | 11x |
draws1_radians <- .boundary.to.radians(draws1, boundary = boundary, target = c(-pi, pi)) |
| 338 | 11x |
draws2_radians <- .boundary.to.radians(draws2, boundary = boundary, target = c(-pi, pi)) |
| 339 | 11x |
span <- 2 * pi |
| 340 | 11x |
x <- draws1_radians + pi |
| 341 | 11x |
y <- draws2_radians + pi |
| 342 | 11x |
diff <- (x - y) %% span |
| 343 | 11x |
diff <- ifelse(diff <= (span / 2), diff, diff - span) |
| 344 | 11x |
diff <- .radians.to.boundary(diff, target = boundary) - mean(boundary) |
| 345 | 11x |
return(diff) |
| 346 |
} |
| 1 |
#' @description |
|
| 2 |
#' Internal function for calculating \alpha and \beta of a beta distributed p (probability) parameter |
|
| 3 |
#' in a beta-binomial conjugate distribution. |
|
| 4 |
#' |
|
| 5 |
#' @param s1 A named list of integer data containing number of successes and number of trials. |
|
| 6 |
#' @examples |
|
| 7 |
#' .conj_binomial_sv( |
|
| 8 |
#' s1 = list(successes = c(15, 14, 16, 11), trials = 20), |
|
| 9 |
#' priors = list(a = c(0.5, 0.5), b = c(0.5, 0.5)), |
|
| 10 |
#' plot = FALSE, cred.int.level = 0.95 |
|
| 11 |
#' ) |
|
| 12 |
#' @keywords internal |
|
| 13 |
#' @noRd |
|
| 14 | ||
| 15 |
.conj_binomial_sv <- function(s1 = NULL, priors = NULL, |
|
| 16 |
plot = FALSE, support = NULL, cred.int.level = NULL, |
|
| 17 |
calculatingSupport = FALSE) {
|
|
| 18 |
#* `check stopping conditions` |
|
| 19 | 8x |
s1 <- .conj_binomial_formatter(s1) |
| 20 |
#* `separate data into counts and trials` |
|
| 21 | 8x |
s1_successes <- s1$successes |
| 22 | 8x |
s1_trials <- s1$trials |
| 23 |
#* `Replicate trials numbers if too short` |
|
| 24 | 8x |
if (length(s1_trials) < length(s1_successes)) {
|
| 25 | 4x |
s1_trials <- rep(s1_trials, length(s1_successes)) |
| 26 |
} |
|
| 27 | ||
| 28 |
#* `make default prior if none provided` |
|
| 29 |
#* `p parameter is beta distributed` |
|
| 30 | 8x |
if (is.null(priors)) {
|
| 31 | 4x |
priors <- list(a = 0.5, b = 0.5) |
| 32 |
} |
|
| 33 |
#* `Define dense Support` |
|
| 34 |
#* `p parameter is beta distributed` |
|
| 35 | 8x |
if (is.null(support) && calculatingSupport) {
|
| 36 | 4x |
return(c(0.0001, 0.9999)) |
| 37 |
} |
|
| 38 | 4x |
out <- list() |
| 39 |
#* `Update priors with observed counts` |
|
| 40 | 4x |
a1_prime <- priors$a[1] + sum(s1_successes) |
| 41 | 4x |
b1_prime <- priors$b[1] + sum(s1_trials) - sum(s1_successes) |
| 42 | ||
| 43 |
#* `calculate density over support`` |
|
| 44 | 4x |
dens1 <- dbeta(support, a1_prime, b1_prime) |
| 45 | 4x |
pdf1 <- dens1 / sum(dens1) |
| 46 | ||
| 47 |
#* `calculate highest density interval` |
|
| 48 | 4x |
hdi1 <- qbeta(c((1 - cred.int.level) / 2, (1 - ((1 - cred.int.level) / 2))), a1_prime, b1_prime) |
| 49 | ||
| 50 |
#* `calculate highest density estimate`` |
|
| 51 | 4x |
hde1 <- .betaHDE(a1_prime, b1_prime) |
| 52 |
#* `save summary and parameters` |
|
| 53 | 4x |
out$summary <- data.frame(HDE_1 = hde1, HDI_1_low = hdi1[1], HDI_1_high = hdi1[2]) |
| 54 | 4x |
out$posterior$a <- a1_prime |
| 55 | 4x |
out$posterior$b <- b1_prime |
| 56 |
#* `Make Posterior Draws` |
|
| 57 | 4x |
out$posteriorDraws <- rbeta(10000, a1_prime, b1_prime) |
| 58 | 4x |
out$pdf <- pdf1 |
| 59 |
#* `keep data for plotting` |
|
| 60 | 4x |
if (plot) {
|
| 61 | 2x |
out$plot_df <- data.frame( |
| 62 | 2x |
"range" = support, "prob" = pdf1, |
| 63 | 2x |
"sample" = rep("Sample 1", length(support))
|
| 64 |
) |
|
| 65 |
} |
|
| 66 | 4x |
return(out) |
| 67 |
} |
|
| 68 | ||
| 69 |
#' Error condition handling helper function |
|
| 70 |
#' @param s1 The sample given to conjugate and conjugate_binomial_sv |
|
| 71 |
#' @keywords internal |
|
| 72 |
#' @noRd |
|
| 73 | ||
| 74 |
.conj_binomial_formatter <- function(s1) {
|
|
| 75 |
#* `Check data for stopping conditions` |
|
| 76 | 11x |
if (any(unlist(s1) < 0)) {
|
| 77 | 1x |
stop(paste0( |
| 78 | 1x |
"Binomial method requires successes and trials in sample data", |
| 79 | 1x |
" as a list of two positive integer vectors. See examples." |
| 80 |
)) |
|
| 81 |
} |
|
| 82 | 10x |
if (length(s1) != 2) {
|
| 83 | 1x |
stop(paste0( |
| 84 | 1x |
"Binomial method requires successes and trials in sample data", |
| 85 | 1x |
" as a list of two integer vectors. See examples." |
| 86 |
)) |
|
| 87 |
} |
|
| 88 | 9x |
if (any(is.null(names(s1)), names(s1) != c("successes", "trials"))) {
|
| 89 | 1x |
message("Assuming sample data is in order 'successes', 'trials'.")
|
| 90 | 1x |
names(s1) <- c("successes", "trials")
|
| 91 |
} |
|
| 92 | 9x |
return(s1) |
| 93 |
} |
| 1 |
#' Multi Value Trait simulating function |
|
| 2 |
#' |
|
| 3 |
#' @description mvSim can be used to simulate data for example models/plots. |
|
| 4 |
#' |
|
| 5 |
#' @param n_samples Number of samples per distribution to generate. Defaults to 10, can be >1L. |
|
| 6 |
#' @param counts Number of counts per histogram, defaults to 1000. |
|
| 7 |
#' @param max_bin The number of bins to return. Note that this is also the max value that will be |
|
| 8 |
#' accepted in the distribution functions, with higher numbers being shrunk to this value. |
|
| 9 |
#' Defaults to 180. |
|
| 10 |
#' @param min_bin The minumum bin number. This can be thought of as the minimum value that will |
|
| 11 |
#' be accepted in the distribution functions, with lower numbers being raised to this value. |
|
| 12 |
#' Note that bin arguments are both ignored in the case of "rbeta" and treated as 0,1. |
|
| 13 |
#' @param dists A list of lists, with names corresponding to random deviate generating functions |
|
| 14 |
#' and arguments to the function in the list values (see examples). Note that the n argument |
|
| 15 |
#' does not need to be provided. |
|
| 16 |
#' @param wide Boolean, should data be returned in wide format (the default)? |
|
| 17 |
#' If FALSE then long data is returned. |
|
| 18 |
#' @param binwidth How wide should bins be? Defaults to 1. |
|
| 19 |
#' @keywords multi-value |
|
| 20 |
#' @return Returns a dataframe of example multi-value trait data simulated from specified distributions. |
|
| 21 |
#' |
|
| 22 |
#' @importFrom graphics hist |
|
| 23 |
#' @importFrom data.table melt as.data.table |
|
| 24 |
#' |
|
| 25 |
#' @examples |
|
| 26 |
#' |
|
| 27 |
#' library(extraDistr) # for rmixnorm |
|
| 28 |
#' library(ggplot2) |
|
| 29 |
#' n_samples = 10 |
|
| 30 |
#' counts = 1000 |
|
| 31 |
#' min_bin = 0 |
|
| 32 |
#' max_bin = 180 |
|
| 33 |
#' dists <- list( |
|
| 34 |
#' rmixnorm = list(mean = c(70, 150), sd = c(15, 5), alpha = c(0.3, 0.7)), |
|
| 35 |
#' rnorm = list(mean = 90, sd = 3) |
|
| 36 |
#' ) |
|
| 37 |
#' x <- mvSim(dists = dists, wide = FALSE) |
|
| 38 |
#' dim(x) |
|
| 39 |
#' x2 <- mvSim(dists = dists) |
|
| 40 |
#' dim(x2) |
|
| 41 |
#' |
|
| 42 |
#' ggplot(x, aes( |
|
| 43 |
#' x = as.numeric(sub("sim_", "", variable)),
|
|
| 44 |
#' y = value, group = interaction(group, id), fill = group |
|
| 45 |
#' )) + |
|
| 46 |
#' geom_col(position = "identity", alpha = 0.25) + |
|
| 47 |
#' pcv_theme() + |
|
| 48 |
#' labs(x = "bin") |
|
| 49 |
#' |
|
| 50 |
#' @export |
|
| 51 | ||
| 52 |
mvSim <- function(dists = list(rnorm = list(mean = 100, sd = 15)), |
|
| 53 |
n_samples = 10, counts = 1000, min_bin = 1, max_bin = 180, wide = TRUE, |
|
| 54 |
binwidth = 1) {
|
|
| 55 | 47x |
if (length(n_samples) == 1) {
|
| 56 | 41x |
n_samples <- rep(n_samples, length(dists)) |
| 57 |
} |
|
| 58 | 47x |
vecs <- .makeVecs(dists, counts, n_samples) |
| 59 | 47x |
out <- .simFreqs(vecs, max_bin, min_bin, binwidth) |
| 60 | 47x |
if (!wide) {
|
| 61 | 8x |
out$id <- seq_len(nrow(out)) |
| 62 | 8x |
out <- as.data.frame(data.table::melt(data.table::as.data.table(out), id.vars = c("group", "id")))
|
| 63 |
} |
|
| 64 | 47x |
return(out) |
| 65 |
} |
|
| 66 | ||
| 67 |
#' internal vector making helper function |
|
| 68 |
#' @keywords internal |
|
| 69 |
#' @noRd |
|
| 70 | ||
| 71 |
.makeVecs <- function(dists, counts, n_samples) {
|
|
| 72 | 47x |
funs <- names(dists) |
| 73 | 47x |
out <- lapply(seq_along(funs), function(i) {
|
| 74 | 89x |
f <- funs[i] |
| 75 | 89x |
fun <- match.fun(f) |
| 76 | 89x |
dists[[i]]$n <- counts |
| 77 | 89x |
lapply(1:n_samples[i], function(e) {
|
| 78 | 2011x |
do.call(fun, args = dists[[i]]) |
| 79 |
}) |
|
| 80 |
}) |
|
| 81 | 47x |
names(out) <- names(dists) |
| 82 | 47x |
return(out) |
| 83 |
} |
|
| 84 | ||
| 85 |
#' internal histogram making helper function |
|
| 86 |
#' @keywords internal |
|
| 87 |
#' @noRd |
|
| 88 | ||
| 89 |
.simFreqs <- function(vecs, max_bin, min_bin, binwidth) {
|
|
| 90 | 47x |
do.call(rbind, lapply(seq_along(vecs), function(i) {
|
| 91 | 89x |
vecName <- names(vecs)[i] |
| 92 | 89x |
vec <- vecs[[i]] |
| 93 | 89x |
do.call(rbind, lapply(vec, function(v) {
|
| 94 | 2011x |
if (vecName == "rbeta") {
|
| 95 | 170x |
v[v > 1] <- 1 |
| 96 | 170x |
v[v < 0] <- 0 |
| 97 | 170x |
s1 <- hist(v, breaks = seq(0, 1, length.out = 100), plot = FALSE)$counts |
| 98 | 170x |
s1d <- as.data.frame(cbind(data.frame(vecName), matrix(s1, nrow = 1))) |
| 99 | 170x |
colnames(s1d) <- c("group", paste0("sim_", seq(0.01, 0.99, 0.01)))
|
| 100 |
} else {
|
|
| 101 | 1841x |
v[v > max_bin] <- max_bin |
| 102 | 1841x |
v[v < min_bin] <- min_bin |
| 103 | 1841x |
s1 <- hist(v, breaks = seq(min_bin, (max_bin + binwidth), binwidth), plot = FALSE)$counts |
| 104 | 1841x |
s1d <- as.data.frame(cbind(data.frame(vecName), matrix(s1, nrow = 1))) |
| 105 | 1841x |
colnames(s1d) <- c("group", paste0("sim_", seq(min_bin, max_bin, binwidth)))
|
| 106 |
} |
|
| 107 | 2011x |
s1d |
| 108 |
})) |
|
| 109 |
})) |
|
| 110 |
} |
| 1 |
#' @description |
|
| 2 |
#' Internal function for calculating the pareto distribution of the upper boundary of a uniform |
|
| 3 |
#' distribution represented by single value traits. |
|
| 4 |
#' @param s1 A vector of numerics drawn from a uniform distribution. |
|
| 5 |
#' @examples |
|
| 6 |
#' |
|
| 7 |
#' out <- .conj_bivariate_lognormal_sv( |
|
| 8 |
#' s1 = rnorm(10, 50, 10), cred.int.level = 0.95, |
|
| 9 |
#' plot = FALSE |
|
| 10 |
#' ) |
|
| 11 |
#' lapply(out, head) |
|
| 12 |
#' |
|
| 13 |
#' @keywords internal |
|
| 14 |
#' @noRd |
|
| 15 | ||
| 16 |
.conj_bivariate_gaussian_sv <- function(s1 = NULL, priors = NULL, |
|
| 17 |
plot = FALSE, support = NULL, cred.int.level = NULL, |
|
| 18 |
calculatingSupport = FALSE) {
|
|
| 19 | 4x |
out <- list() |
| 20 |
#* `make default prior if none provided` |
|
| 21 |
#* conjugate prior needs alpha, beta, mu, prec (or var or sd) |
|
| 22 |
#* precision is Gamma(alpha, beta) |
|
| 23 |
#* mu is T_[2*alpha](mu, precision) |
|
| 24 | 4x |
if (is.null(priors)) {
|
| 25 | 4x |
priors <- list(mu = 0, sd = 10, a = 1, b = 1) |
| 26 |
} |
|
| 27 |
#* `Extract prior components` |
|
| 28 | 4x |
alpha <- priors$a[1] |
| 29 | 4x |
beta <- priors$b[1] |
| 30 | 4x |
mu <- priors$mu[1] |
| 31 | 4x |
prec <- 1 / (priors$sd^2) |
| 32 |
#* `Calculate sufficient statistics` |
|
| 33 | 4x |
n <- length(s1) |
| 34 | 4x |
x_bar <- mean(s1) |
| 35 | 4x |
ss <- sum((s1 - x_bar)^2) |
| 36 |
#* `Update priors with sufficient statistics` |
|
| 37 | 4x |
alpha_prime <- alpha + (n / 2) |
| 38 | 4x |
beta_prime <- 1 / ((1 / beta) + (ss / 2) + ((prec * n * ((x_bar - mu)^2)) / (2 * (prec + n)))) |
| 39 | 4x |
mu_prime <- ((prec * mu) + (n * x_bar)) / (prec + n) |
| 40 | 4x |
prec_prime <- prec + n |
| 41 | 4x |
df_prime <- 2 * alpha_prime |
| 42 | 4x |
prec_prime_t <- alpha_prime * prec_prime * beta_prime |
| 43 | 4x |
sigma_prime <- sqrt(1 / prec_prime_t) |
| 44 |
#* `Define bivariate support if it is missing` |
|
| 45 | 4x |
if (is.null(support)) {
|
| 46 | 2x |
quantiles_mu <- extraDistr::qlst(c(0.0001, 0.9999), df_prime, mu_prime, sigma_prime) |
| 47 | 2x |
quantiles_prec <- stats::qgamma(c(0.0001, 0.9999), shape = alpha_prime, scale = beta_prime) |
| 48 | 2x |
support_mu <- seq(quantiles_mu[1], quantiles_mu[2], length.out = 10000) |
| 49 | 2x |
support_prec <- seq(quantiles_prec[1], quantiles_prec[2], length.out = 10000) |
| 50 | 2x |
if (calculatingSupport) {
|
| 51 | 2x |
return(list("Mu" = quantiles_mu, "Prec" = quantiles_prec))
|
| 52 |
} |
|
| 53 |
} else {
|
|
| 54 | 2x |
support_mu <- support$Mu |
| 55 | 2x |
support_prec <- support$Prec |
| 56 |
} |
|
| 57 |
#* `Make Posterior Draws` |
|
| 58 | 2x |
out$posteriorDraws <- .conj_biv_rough_sampling( |
| 59 | 2x |
10000, alpha_prime, beta_prime, |
| 60 | 2x |
mu_prime, sigma_prime, df_prime |
| 61 |
) |
|
| 62 |
#* `posterior` |
|
| 63 | 2x |
dens_mu <- extraDistr::dlst(support_mu, df_prime, mu_prime, sigma_prime) |
| 64 | 2x |
dens_prec <- stats::dgamma(support_prec, shape = alpha_prime, scale = beta_prime) |
| 65 | ||
| 66 | 2x |
pdf_mu <- dens_mu / sum(dens_mu) |
| 67 | 2x |
pdf_prec <- dens_prec / sum(dens_prec) |
| 68 | 2x |
out$pdf <- list("Mu" = pdf_mu, "Prec" = pdf_prec)
|
| 69 | ||
| 70 | 2x |
hde_mu <- mu_prime |
| 71 | 2x |
hde_prec <- .gammaHDE(shape = alpha_prime, scale = beta_prime) |
| 72 | 2x |
hdi_mu <- -1 * rev(extraDistr::qlst( |
| 73 | 2x |
c((1 - cred.int.level) / 2, (1 - ((1 - cred.int.level) / 2))), |
| 74 | 2x |
df_prime, mu_prime, sigma_prime |
| 75 |
)) |
|
| 76 | 2x |
hdi_prec <- -1 * rev(stats::qgamma( |
| 77 | 2x |
c((1 - cred.int.level) / 2, (1 - ((1 - cred.int.level) / 2))), |
| 78 | 2x |
shape = alpha_prime, scale = beta_prime |
| 79 |
)) |
|
| 80 | ||
| 81 |
#* `Store summary` |
|
| 82 | 2x |
out$summary <- data.frame( |
| 83 | 2x |
HDE_1 = c(hde_mu, hde_prec), |
| 84 | 2x |
HDI_1_low = c(hdi_mu[1], hdi_prec[1]), |
| 85 | 2x |
HDI_1_high = c(hdi_mu[2], hdi_prec[2]), |
| 86 | 2x |
param = c("Mu", "Prec")
|
| 87 |
) |
|
| 88 | 2x |
out$posterior <- list( |
| 89 | 2x |
"mu" = mu_prime, "sd" = sigma_prime, |
| 90 | 2x |
"a" = alpha_prime, "b" = beta_prime |
| 91 |
) |
|
| 92 |
#* `save s1 data for plotting` |
|
| 93 | 2x |
if (plot) {
|
| 94 | 2x |
out$plot_df <- data.frame( |
| 95 | 2x |
"range" = c(support_mu, support_prec), |
| 96 | 2x |
"prob" = c(pdf_mu, pdf_prec), |
| 97 | 2x |
"param" = rep(c("Mu", "Prec"), each = length(support_mu)),
|
| 98 | 2x |
"sample" = rep("Sample 1", 2 * length(support_mu))
|
| 99 |
) |
|
| 100 |
} |
|
| 101 | 2x |
return(out) |
| 102 |
} |
|
| 103 | ||
| 104 |
.conj_biv_rough_sampling <- function(n, alpha, beta, mu, sigma, df) {
|
|
| 105 |
#* I wanted this to be conditional inverse sampling |
|
| 106 |
#* but that doesn't work due to the t distribution |
|
| 107 |
#* this isn't used for hypothesis testing though, just visualization. |
|
| 108 | 4x |
x1 <- extraDistr::rlst(n, df, mu, sigma) |
| 109 | 4x |
x2 <- stats::rgamma(n, shape = alpha, scale = beta) |
| 110 | 4x |
return(cbind.data.frame("Mu" = x1, "Prec" = x2))
|
| 111 |
} |
| 1 |
#' @description |
|
| 2 |
#' Internal function for calculating the gamma distributed rate parameter of an exponential distribution |
|
| 3 |
#' represented by single value traits. |
|
| 4 |
#' @param s1 A vector of numerics drawn from a pareto distribution. |
|
| 5 |
#' @examples |
|
| 6 |
#' out <- .conj_exponential_sv( |
|
| 7 |
#' s1 = rexp(10, 3), cred.int.level = 0.95, |
|
| 8 |
#' plot = FALSE |
|
| 9 |
#' ) |
|
| 10 |
#' lapply(out, head) |
|
| 11 |
#' @keywords internal |
|
| 12 |
#' @noRd |
|
| 13 |
.conj_exponential_sv <- function(s1 = NULL, priors = NULL, |
|
| 14 |
plot = FALSE, support = NULL, cred.int.level = NULL, |
|
| 15 |
calculatingSupport = FALSE) {
|
|
| 16 | 4x |
out <- list() |
| 17 |
#* `make default prior if none provided` |
|
| 18 | 4x |
if (is.null(priors)) {
|
| 19 | 4x |
priors <- list(a = 0.5, b = 0.5) |
| 20 |
} |
|
| 21 |
#* `Update gamma prior with sufficient statistics` |
|
| 22 | 4x |
n <- length(s1) |
| 23 | 4x |
S <- sum(s1) |
| 24 | 4x |
a_prime <- priors$a[1] + n |
| 25 | 4x |
b_prime <- priors$b[1] + S |
| 26 |
#* `Define support if it is missing` |
|
| 27 | 4x |
if (is.null(support) && calculatingSupport) {
|
| 28 | 2x |
quantiles <- qgamma(c(0.0001, 0.9999), a_prime, b_prime) |
| 29 | 2x |
return(quantiles) |
| 30 |
} |
|
| 31 |
#* `Make Posterior Draws` |
|
| 32 | 2x |
out$posteriorDraws <- rgamma(10000, a_prime, b_prime) |
| 33 |
#* `posterior` |
|
| 34 | 2x |
dens1 <- dgamma(support, a_prime, b_prime) |
| 35 | 2x |
pdf1 <- dens1 / sum(dens1) |
| 36 | 2x |
out$pdf <- pdf1 |
| 37 | 2x |
hde1 <- .gammaHDE(shape = a_prime, scale = 1 / b_prime) |
| 38 | 2x |
hdi1 <- qgamma( |
| 39 | 2x |
c((1 - cred.int.level) / 2, (1 - ((1 - cred.int.level) / 2))), |
| 40 | 2x |
a_prime, b_prime |
| 41 |
) |
|
| 42 |
#* `Store summary` |
|
| 43 | 2x |
out$summary <- data.frame(HDE_1 = hde1, HDI_1_low = hdi1[1], HDI_1_high = hdi1[2]) |
| 44 | 2x |
out$posterior <- list( |
| 45 | 2x |
"a" = a_prime, |
| 46 | 2x |
"b" = b_prime |
| 47 |
) |
|
| 48 |
#* `save s1 data for plotting` |
|
| 49 | 2x |
if (plot) {
|
| 50 | 2x |
out$plot_df <- data.frame( |
| 51 | 2x |
"range" = support, |
| 52 | 2x |
"prob" = pdf1, |
| 53 | 2x |
"sample" = rep("Sample 1", length(support))
|
| 54 |
) |
|
| 55 |
} |
|
| 56 | 2x |
return(out) |
| 57 |
} |
| 1 |
#' Calculate pseudo water use efficiency from phenotype and watering data |
|
| 2 |
#' |
|
| 3 |
#' @description Rate based water use efficiency (WUE) is the change in biomass per unit of water |
|
| 4 |
#' metabolized. Using image based phenotypes and watering data we can calculate pseudo-WUE (pwue) over |
|
| 5 |
#' time. Here area_pixels is used as a proxy for biomass and transpiration is approximated using |
|
| 6 |
#' watering data. The equation is then |
|
| 7 |
#' \eqn{
|
|
| 8 |
#' \frac{P_{t} - P_{t-1}}{W_{t_{end-1}}-W_{t_{start}} }}{P_[t] - P_[t-1] / W_[t_(end-1)]-W_[t_start]
|
|
| 9 |
#' }, |
|
| 10 |
#' where P is the phenotype and W is the weight before watering. |
|
| 11 |
#' |
|
| 12 |
#' Absolute value based WUE is the amount of water used to sustain a plants biomass over a given period. |
|
| 13 |
#' The equation is then |
|
| 14 |
#' \eqn{\frac{P_{t}}{W_{t_{end-1}}-W_{t_{start}} }}{P_[t] / W_[t_(end-1)]-W_[t_start]}
|
|
| 15 |
#' |
|
| 16 |
#' @param df Dataframe containing wide single-value phenotype data. |
|
| 17 |
#' This should already be aggregated to one row per plant per day (angles/rotations combined). |
|
| 18 |
#' @param w Watering data as returned from bw.water. |
|
| 19 |
#' @param pheno Phenotype column name, defaults to "area_pixels" |
|
| 20 |
#' @param time Variable(s) that identify a plant on a given day. |
|
| 21 |
#' Defaults to \code{c("barcode", "DAS")}.
|
|
| 22 |
#' @param id Variable(s) that identify a plant over time. Defaults to \code{"barcode"}.
|
|
| 23 |
#' @param offset Optionally you can specify how long before imaging a watering should not be taken into |
|
| 24 |
#' account. This defaults to 0, meaning that if a plant were watered directly before being imaged then |
|
| 25 |
#' that water would be counted towards WUE between the current image and the prior one. |
|
| 26 |
#' This argument is taken to be in seconds. |
|
| 27 |
#' @param waterCol Column containing watering amounts in \code{w}. This defaults to "watering_amount".
|
|
| 28 |
#' @param method Which method to use, options are "rate" and "abs". The "rate" method considers WUE as |
|
| 29 |
#' the change in a phenotype divided by the amount of water added. The "abs" method considers WUE as |
|
| 30 |
#' the amount of water used by a plant given its absolute size. The former is for questions more |
|
| 31 |
#' related to efficiency in using water to grow while the latter is more suited to questions about |
|
| 32 |
#' how efficient a plant is at maintaining size given some amount of water. |
|
| 33 |
#' @keywords WUE |
|
| 34 |
#' @import data.table |
|
| 35 |
#' @return A data frame containing the bellwether watering data joined |
|
| 36 |
#' to phenotype data with new columns for change in the phenotype, |
|
| 37 |
#' change in the pre-watering weight, and pseudo-water use efficiency (pWUE). |
|
| 38 |
#' @examples |
|
| 39 |
#' sim_water <- data.frame( |
|
| 40 |
#' "barcode" = "exampleBarcode1", |
|
| 41 |
#' "timestamp" = as.POSIXct(c( |
|
| 42 |
#' "2023-04-13 23:28:17 UTC", |
|
| 43 |
#' "2023-04-22 05:30:42 UTC", |
|
| 44 |
#' "2023-05-04 18:55:38 UTC" |
|
| 45 |
#' )), |
|
| 46 |
#' "DAS" = c(0.000000, 8.251675, 20.810660), |
|
| 47 |
#' "water_amount" = c(98, 12, -1) |
|
| 48 |
#' ) |
|
| 49 |
#' sim_df <- data.frame( |
|
| 50 |
#' "barcode" = "exampleBarcode1", |
|
| 51 |
#' "timestamp" = as.POSIXct(c( |
|
| 52 |
#' "2023-04-13 23:28:17 UTC", |
|
| 53 |
#' "2023-04-22 05:30:42 UTC", |
|
| 54 |
#' "2023-05-04 18:55:38 UTC" |
|
| 55 |
#' )), |
|
| 56 |
#' "DAS" = c(0.000000, 8, 20), |
|
| 57 |
#' "area_pixels" = c(20, 1000, 1500) |
|
| 58 |
#' ) |
|
| 59 |
#' pwue( |
|
| 60 |
#' df = sim_df, w = sim_water, pheno = "area_pixels", |
|
| 61 |
#' time = "timestamp", id = "barcode", offset = 0, |
|
| 62 |
#' waterCol = "water_amount", method = "rate" |
|
| 63 |
#' ) |
|
| 64 |
#' |
|
| 65 |
#' pwue( |
|
| 66 |
#' df = sim_df, w = sim_water, pheno = "area_pixels", |
|
| 67 |
#' time = c("timestamp", "timestamp"), id = "barcode", offset = 0,
|
|
| 68 |
#' waterCol = "water_amount", method = "abs" |
|
| 69 |
#' ) |
|
| 70 |
#' |
|
| 71 |
#' @export |
|
| 72 | ||
| 73 |
pwue <- function(df, w, pheno = "area_pixels", time = "timestamp", id = "barcode", |
|
| 74 |
offset = 0, waterCol = "water_amount", method = "rate") {
|
|
| 75 | 3x |
if (length(time) == 2) {
|
| 76 | 1x |
time1 <- time[1] |
| 77 | 1x |
time2 <- time[2] |
| 78 |
} else {
|
|
| 79 | 2x |
time1 <- time2 <- time |
| 80 |
} |
|
| 81 | 3x |
if (!time1 %in% colnames(df) || !time2 %in% colnames(w)) {
|
| 82 | ! |
stop(paste0(paste0(time, collapse = ", "), " must be in colnames of df and w")) |
| 83 |
} |
|
| 84 | ||
| 85 | 3x |
w <- data.table::setorderv(data.table::as.data.table(w), cols = c(id, time2)) |
| 86 | 3x |
w <- w[w[[waterCol]] > 0, ] |
| 87 | 3x |
df <- data.table::setorderv(data.table::as.data.table(df), cols = c(id, time1)) |
| 88 | 3x |
ids <- intersect(unique(w[, get(id)]), unique(df[, get(id)])) |
| 89 | 3x |
matched_method <- match.arg(method, choices = c("rate", "abs"))
|
| 90 | ||
| 91 | 3x |
if (matched_method == "abs") {
|
| 92 | 1x |
out <- .absWUE(ids, w, df, offset, time1, time2, pheno, id, waterCol) |
| 93 | 2x |
} else if (matched_method == "rate") {
|
| 94 | 2x |
out <- .rateWUE(ids, w, df, offset, time1, time2, pheno, id, waterCol) |
| 95 |
} |
|
| 96 | 3x |
return(as.data.frame(out)) |
| 97 |
} |
|
| 98 | ||
| 99 |
#' Function to calculate rate based WUE |
|
| 100 |
#' @keywords internal |
|
| 101 |
#' @noRd |
|
| 102 | ||
| 103 |
.rateWUE <- function(ids, w, df, offset, time1, time2, pheno, id, waterCol) {
|
|
| 104 | 2x |
out <- do.call(rbind, lapply(ids, function(iter_id) { # per id...
|
| 105 | 89x |
w_i <- w[w[, get(id)] == iter_id, ] |
| 106 | 89x |
df_i <- df[df[, get(id)] == iter_id, ] |
| 107 |
#* reorder watering and pheno data |
|
| 108 | 89x |
w_i <- data.table::setorderv(w_i, cols = c(time2)) |
| 109 | 89x |
df_i <- data.table::setorderv(df_i, cols = c(time1)) |
| 110 |
#* get unique imaging times |
|
| 111 | 89x |
imaging_times <- unique(df_i[[time1]]) |
| 112 |
#* set water from before first image to zero so that |
|
| 113 |
#* offset does not grab water from before imaging starts. |
|
| 114 | 89x |
w_i[[waterCol]] <- as.numeric(ifelse(w_i[[time2]] < imaging_times[1], 0, w_i[[waterCol]])) |
| 115 |
#* per imaging time |
|
| 116 | 89x |
wue_i <- do.call(rbind, lapply(seq_along(imaging_times), function(t_i) {
|
| 117 | 1442x |
start <- if (t_i == 1) {
|
| 118 | 1442x |
NA |
| 119 |
} else {
|
|
| 120 | 1353x |
imaging_times[(t_i - 1)] - offset |
| 121 |
} |
|
| 122 | 1442x |
startNonOffset <- if (t_i == 1) {
|
| 123 | 1442x |
NA |
| 124 |
} else {
|
|
| 125 | 1353x |
imaging_times[(t_i - 1)] |
| 126 |
} |
|
| 127 | 1442x |
end <- imaging_times[t_i] - offset |
| 128 | 1442x |
endNonOffset <- imaging_times[t_i] |
| 129 | 1442x |
if (!is.na(start)) {
|
| 130 | 1353x |
w_i_t <- w_i[w_i[[time2]] > start & w_i[[time2]] < end, ] |
| 131 | 1353x |
total_water_i <- max(c(sum(w_i_t[[waterCol]]), 1)) |
| 132 | 1353x |
pheno_diff <- max(c( |
| 133 | 1353x |
as.numeric(df_i[df_i[[time1]] == endNonOffset, get(pheno)] - df_i[ |
| 134 | 1353x |
df_i[[time1]] == startNonOffset, get(pheno) |
| 135 | 1353x |
]), 0 |
| 136 |
)) |
|
| 137 |
} else {
|
|
| 138 | 89x |
total_water_i <- NA |
| 139 | 89x |
pheno_diff <- NA |
| 140 |
} |
|
| 141 | ||
| 142 | 1442x |
row <- data.frame( |
| 143 | 1442x |
total_water = total_water_i, |
| 144 | 1442x |
pheno_diff = pheno_diff, |
| 145 | 1442x |
start = startNonOffset, |
| 146 | 1442x |
end = endNonOffset, |
| 147 | 1442x |
timeLengthSeconds = as.numeric(end) - as.numeric(start), |
| 148 | 1442x |
offset = offset |
| 149 |
) |
|
| 150 | 1442x |
row$pWUE <- row$pheno_diff / row$total_water |
| 151 | 1442x |
return(row) |
| 152 |
})) |
|
| 153 | 89x |
iter_out <- cbind(df_i, wue_i) |
| 154 | 89x |
return(iter_out) |
| 155 |
})) |
|
| 156 | 2x |
return(out) |
| 157 |
} |
|
| 158 | ||
| 159 |
#' Function to calculate absolute value based WUE |
|
| 160 |
#' @keywords internal |
|
| 161 |
#' @noRd |
|
| 162 | ||
| 163 | ||
| 164 |
.absWUE <- function(ids, w, df, offset, time1, time2, pheno, id, waterCol) {
|
|
| 165 | 1x |
out <- do.call(rbind, lapply(ids, function(iter_id) { # per id...
|
| 166 | 1x |
w_i <- w[w[[id]] == iter_id, ] |
| 167 | 1x |
df_i <- df[df[[id]] == iter_id, ] |
| 168 |
#* reorder watering and pheno data |
|
| 169 | 1x |
w_i <- data.table::setorderv(w_i, cols = c(time2)) |
| 170 | 1x |
df_i <- data.table::setorderv(df_i, cols = c(time1)) |
| 171 |
#* get unique imaging times |
|
| 172 | 1x |
imaging_times <- unique(df_i[[time1]]) |
| 173 |
#* set water from before first image to zero so that offset |
|
| 174 |
#* does not grab water from before imaging starts. |
|
| 175 | 1x |
w_i[[waterCol]] <- ifelse(w_i[[time2]] < imaging_times[1], 0, w_i[[time2]]) |
| 176 |
#* per imaging time |
|
| 177 | 1x |
wue_i <- do.call(rbind, lapply(seq_along(imaging_times), function(t_i) {
|
| 178 | 3x |
start <- if (t_i == 1) {
|
| 179 | 3x |
NA |
| 180 |
} else {
|
|
| 181 | 2x |
imaging_times[(t_i - 1)] - offset |
| 182 |
} |
|
| 183 | 3x |
startNonOffset <- if (t_i == 1) {
|
| 184 | 3x |
NA |
| 185 |
} else {
|
|
| 186 | 2x |
imaging_times[(t_i - 1)] |
| 187 |
} |
|
| 188 | 3x |
end <- imaging_times[t_i] - offset |
| 189 | 3x |
endNonOffset <- imaging_times[t_i] |
| 190 | ||
| 191 | 3x |
if (!is.na(start)) {
|
| 192 | 2x |
w_i_t <- w_i[w_i[[time2]] > start & w_i[[time2]] < end, ] |
| 193 | 2x |
total_water_i <- max(c(sum(w_i_t[[waterCol]]), 1)) |
| 194 | 2x |
pheno_iter <- max(df_i[df_i[[time1]] == endNonOffset, get(pheno)], na.rm = TRUE) |
| 195 |
} else {
|
|
| 196 | 1x |
total_water_i <- NA |
| 197 | 1x |
pheno_iter <- NA |
| 198 |
} |
|
| 199 | ||
| 200 | 3x |
row <- data.frame( |
| 201 | 3x |
total_water = total_water_i, |
| 202 | 3x |
pheno_iter = pheno_iter, |
| 203 | 3x |
start = startNonOffset, |
| 204 | 3x |
end = endNonOffset, |
| 205 | 3x |
timeLengthSeconds = as.numeric(end) - as.numeric(start), |
| 206 | 3x |
offset = offset |
| 207 |
) |
|
| 208 | 3x |
row$pWUE <- row$pheno_iter / row$total_water |
| 209 | 3x |
return(row) |
| 210 |
})) |
|
| 211 | 1x |
iter_out <- cbind(df_i, wue_i) |
| 212 | 1x |
return(iter_out) |
| 213 |
})) |
|
| 214 | 1x |
return(out) |
| 215 |
} |
| 1 |
#' Combine Draws From brms Models |
|
| 2 |
#' |
|
| 3 |
#' @description Helper function for binding draws from several \code{brms} models to make a data.frame
|
|
| 4 |
#' for use with \code{brms::hypothesis()}. This will also check that the draws are comparable using
|
|
| 5 |
#' basic model metrics. |
|
| 6 |
#' |
|
| 7 |
#' @param ... Some number of brmsfit objects and/or dataframes of draws |
|
| 8 |
#' (should generally be the same type of model fit to different data) |
|
| 9 |
#' @param message Logical, should messages about possible problems be printed? Default is TRUE. |
|
| 10 |
#' This will warn if models may not have converged, if there are different numbers of draws in |
|
| 11 |
#' the objects, or if models have different formulations. |
|
| 12 |
#' @keywords brms |
|
| 13 |
#' @importFrom methods is |
|
| 14 |
#' @examples |
|
| 15 |
#' # note that this example will fit several bayesian models and may run for several minutes. |
|
| 16 |
#' \donttest{
|
|
| 17 |
#' simdf <- growthSim("logistic",
|
|
| 18 |
#' n = 20, t = 25, |
|
| 19 |
#' params = list( |
|
| 20 |
#' "A" = c(200, 160, 220, 200, 140, 300), |
|
| 21 |
#' "B" = c(13, 11, 10, 9, 16, 12), |
|
| 22 |
#' "C" = c(3, 3.5, 3.2, 2.8, 3.3, 2.5) |
|
| 23 |
#' ) |
|
| 24 |
#' ) |
|
| 25 |
#' ss_ab <- growthSS( |
|
| 26 |
#' model = "logistic", form = y ~ time | id / group, |
|
| 27 |
#' sigma = "logistic", df = simdf[simdf$group %in% c("a", "b"), ],
|
|
| 28 |
#' start = list( |
|
| 29 |
#' "A" = 130, "B" = 12, "C" = 3, |
|
| 30 |
#' "sigmaA" = 15, "sigmaB" = 10, "sigmaC" = 3 |
|
| 31 |
#' ), type = "brms" |
|
| 32 |
#' ) |
|
| 33 |
#' |
|
| 34 |
#' ss_cd <- growthSS( |
|
| 35 |
#' model = "logistic", form = y ~ time | id / group, |
|
| 36 |
#' sigma = "logistic", df = simdf[simdf$group %in% c("c", "d"), ],
|
|
| 37 |
#' start = list( |
|
| 38 |
#' "A" = 130, "B" = 12, "C" = 3, |
|
| 39 |
#' "sigmaA" = 15, "sigmaB" = 10, "sigmaC" = 3 |
|
| 40 |
#' ), type = "brms" |
|
| 41 |
#' ) |
|
| 42 |
#' |
|
| 43 |
#' ss_ef <- growthSS( |
|
| 44 |
#' model = "logistic", form = y ~ time | id / group, |
|
| 45 |
#' sigma = "logistic", df = simdf[simdf$group %in% c("e", "f"), ],
|
|
| 46 |
#' start = list( |
|
| 47 |
#' "A" = 130, "B" = 12, "C" = 3, |
|
| 48 |
#' "sigmaA" = 15, "sigmaB" = 10, "sigmaC" = 3 |
|
| 49 |
#' ), type = "brms" |
|
| 50 |
#' ) |
|
| 51 |
#' ss_ef2 <- growthSS( |
|
| 52 |
#' model = "gompertz", form = y ~ time | id / group, |
|
| 53 |
#' sigma = "logistic", df = simdf[simdf$group %in% c("e", "f"), ],
|
|
| 54 |
#' start = list( |
|
| 55 |
#' "A" = 130, "B" = 12, "C" = 3, |
|
| 56 |
#' "sigmaA" = 15, "sigmaB" = 10, "sigmaC" = 3 |
|
| 57 |
#' ), type = "brms" |
|
| 58 |
#' ) |
|
| 59 |
#' |
|
| 60 |
#' |
|
| 61 |
#' fit_ab <- fitGrowth(ss_ab, chains = 1, cores = 1, iter = 1000) |
|
| 62 |
#' fit_ab2 <- fitGrowth(ss_ab, chains = 1, cores = 1, iter = 1200) |
|
| 63 |
#' fit_cd <- fitGrowth(ss_cd, chains = 1, cores = 1, iter = 1000) |
|
| 64 |
#' fit_ef <- fitGrowth(ss_ef, chains = 1, cores = 1, iter = 1000) |
|
| 65 |
#' fit_ef2 <- fitGrowth(ss_ef2, chains = 1, cores = 1, iter = 1000) |
|
| 66 |
#' |
|
| 67 |
#' x <- combineDraws(fit_ab, fit_cd, fit_ef) |
|
| 68 |
#' draws_ef <- as.data.frame(fit_ef) |
|
| 69 |
#' draws_ef <- draws_ef[, grepl("^b_", colnames(draws_ef))]
|
|
| 70 |
#' x2 <- combineDraws(fit_ab2, fit_cd, draws_ef) |
|
| 71 |
#' x3 <- combineDraws(fit_ab, fit_cd, fit_ef2) |
|
| 72 |
#' } |
|
| 73 |
#' |
|
| 74 |
#' @return Returns a dataframe of posterior draws. |
|
| 75 |
#' @export |
|
| 76 | ||
| 77 | ||
| 78 | ||
| 79 | ||
| 80 |
combineDraws <- function(..., message = TRUE) {
|
|
| 81 | ! |
objects <- list(...) |
| 82 | ! |
if (!all(unlist(lapply(objects, function(m) {
|
| 83 | ! |
methods::is(m, "brmsfit") | methods::is(m, "data.frame") |
| 84 |
})))) {
|
|
| 85 | ! |
stop("Only brmsfit objects and data frames are accepted")
|
| 86 |
} |
|
| 87 | ||
| 88 | ! |
obj_names <- sapply(substitute(list(...)), deparse)[-1] |
| 89 | ! |
models <- objects[unlist(lapply(objects, function(m) {
|
| 90 | ! |
methods::is(m, "brmsfit") |
| 91 |
}))] |
|
| 92 | ! |
model_names <- obj_names[unlist(lapply(objects, function(m) {
|
| 93 | ! |
methods::is(m, "brmsfit") |
| 94 |
}))] |
|
| 95 | ! |
supplied_draw_dfs <- objects[unlist(lapply(objects, function(m) {
|
| 96 | ! |
methods::is(m, "data.frame") |
| 97 |
}))] |
|
| 98 | ! |
df_names <- obj_names[unlist(lapply(objects, function(m) {
|
| 99 | ! |
methods::is(m, "data.frame") |
| 100 |
}))] |
|
| 101 | ||
| 102 | ! |
max_fit_draws <- max(unlist(lapply(models, function(m) {
|
| 103 | ! |
nrow(as.data.frame(m)) |
| 104 |
}))) |
|
| 105 | ! |
max_nrow_supplied <- max(c(0, unlist(lapply(supplied_draw_dfs, nrow)))) |
| 106 | ! |
limit_size <- max(c(max_fit_draws, max_nrow_supplied)) |
| 107 | ||
| 108 |
#* `check that formulae are the same` |
|
| 109 | ! |
if (message) {
|
| 110 | ! |
formulae <- unlist(lapply(models, function(m) {
|
| 111 | ! |
x <- as.character(m$formula$formula) |
| 112 | ! |
form <- paste0(x[2], x[1], x[3]) |
| 113 | ! |
return(form) |
| 114 |
})) |
|
| 115 | ! |
names(formulae) <- model_names |
| 116 | ! |
if (length(unique(formulae)) > 1) {
|
| 117 | ! |
message("Some of these models have different growth formulas, consider if this is what you want.")
|
| 118 | ! |
message(paste0(paste(names(formulae), formulae, sep = ": "), collapse = ", ")) |
| 119 |
} |
|
| 120 |
} |
|
| 121 |
#* `get and bind draws from models` |
|
| 122 | ! |
new_draws <- do.call(cbind, lapply(seq_along(models), function(i) {
|
| 123 | ! |
m <- models[[i]] |
| 124 | ! |
mn <- model_names[[i]] |
| 125 | ! |
d <- as.data.frame(m) |
| 126 | ! |
draws <- d[, grepl("^b_", colnames(d))]
|
| 127 | ! |
colnames(draws) <- gsub("^b", mn, colnames(draws))
|
| 128 | ||
| 129 | ! |
if (message) {
|
| 130 | ! |
rhats <- brms::rhat(m) |
| 131 | ! |
rhats <- rhats[grepl("^b_", names(rhats))]
|
| 132 | ! |
if (any(rhats > 1.05)) {
|
| 133 | ! |
message(paste0( |
| 134 | ! |
mn, " has Rhat values >1.05 for some model parameters.", |
| 135 | ! |
"See ?barg for possible improvements.\n" |
| 136 |
)) |
|
| 137 |
} |
|
| 138 |
} |
|
| 139 | ||
| 140 | ! |
if (nrow(draws) < limit_size) {
|
| 141 | ! |
if (message) {
|
| 142 | ! |
message(paste0( |
| 143 | ! |
mn, " has fewer than ", limit_size, " draws and will be padded with ", |
| 144 | ! |
limit_size - nrow(draws), " NAs\n" |
| 145 |
)) |
|
| 146 |
} |
|
| 147 | ! |
draws[(nrow(draws) + 1):limit_size, ] <- NA |
| 148 | ! |
draws |
| 149 |
} |
|
| 150 | ! |
draws |
| 151 |
})) |
|
| 152 |
#* `bind any other dataframes of draws` |
|
| 153 | ! |
if (length(supplied_draw_dfs) > 0) {
|
| 154 | ! |
supplied_draw_dfs <- lapply(seq_along(supplied_draw_dfs), function(i) {
|
| 155 | ! |
df <- supplied_draw_dfs[[i]] |
| 156 | ! |
dn <- df_names[[i]] |
| 157 | ! |
if (nrow(df) < limit_size) {
|
| 158 | ! |
if (message) {
|
| 159 | ! |
message(paste0( |
| 160 | ! |
dn, " has fewer than ", limit_size, " draws and will be padded with ", |
| 161 | ! |
limit_size - nrow(df), " NAs\n" |
| 162 |
)) |
|
| 163 |
} |
|
| 164 | ! |
df[(nrow(df) + 1):limit_size, ] <- NA |
| 165 |
} |
|
| 166 | ! |
df |
| 167 |
}) |
|
| 168 | ! |
new_draws <- do.call(cbind, args = list(supplied_draw_dfs, new_draws)) |
| 169 |
} |
|
| 170 | ! |
new_draws |
| 171 |
} |
| 1 |
#' Bayesian testing using conjugate priors and method of moments for single or multi value traits. |
|
| 2 |
#' |
|
| 3 |
#' @description |
|
| 4 |
#' Function to perform bayesian tests and ROPE comparisons using single or multi value traits with |
|
| 5 |
#' several distributions. |
|
| 6 |
#' |
|
| 7 |
#' @param s1 A data.frame or matrix of multi value traits or a vector of single value traits. |
|
| 8 |
#' If a multi value trait is used then column names should include a number representing the "bin". |
|
| 9 |
#' Alternatively for distributions other than "binomial" (which requires list data |
|
| 10 |
#' with "successes" and "trials" as numeric vectors in the list, see examples) |
|
| 11 |
#' this can be a formula specifying \code{outcome ~ group} where group has exactly 2
|
|
| 12 |
#' levels. If using wide MV trait data then the formula should specify column positions ~ grouping |
|
| 13 |
#' such as \code{1:180 ~ group}.
|
|
| 14 |
#' This sample is shown in red if plotted. |
|
| 15 |
#' @param s2 An optional second sample, or if s1 is a formula then this should be a dataframe. |
|
| 16 |
#' This sample is shown in blue if plotted. |
|
| 17 |
#' @param method The distribution/method to use. |
|
| 18 |
#' Currently "t", "gaussian", "beta", "binomial", "lognormal", "lognormal2", "poisson", |
|
| 19 |
#' "negbin" (negative binomial), "uniform", "pareto", "gamma", "bernoulli", "exponential", |
|
| 20 |
#' "vonmises", and "vonmises2" are supported. |
|
| 21 |
#' The count (binomial, poisson and negative binomial), bernoulli, exponential, |
|
| 22 |
#' and pareto distributions are only implemented for single value traits due to their updating |
|
| 23 |
#' and/or the nature of the input data. |
|
| 24 |
#' The "t" and "gaussian" methods both use a T distribution with "t" testing for a difference |
|
| 25 |
#' of means and "gaussian" testing for a difference in the distributions (similar to a Z test). |
|
| 26 |
#' Both Von Mises options are for use with circular data (for instance hue values when the circular |
|
| 27 |
#' quality of the data is relevant). Note that non-circular distributions can be compared to each other. |
|
| 28 |
#' This should only be done with caution. Make sure you understand the interpretation of any |
|
| 29 |
#' comparison you are doing if you specify two methods (c("gaussian", "lognormal") as an arbitrary
|
|
| 30 |
#' example). |
|
| 31 |
#' There are also 3 bivariate conjugate priors that are supported for use with single value data. |
|
| 32 |
#' Those are "bivariate_uniform", "bivariate_gaussian" and "bivariate_lognormal". |
|
| 33 |
#' @param priors Prior distributions described as a list of lists. |
|
| 34 |
#' If this is a single list then it will be duplicated for the second sample, |
|
| 35 |
#' which is generally a good idea if both |
|
| 36 |
#' samples use the same distribution (method). |
|
| 37 |
#' Elements in the inner lists should be named for the parameter they represent (see examples). |
|
| 38 |
#' These names vary by method (see details). |
|
| 39 |
#' By default this is NULL and weak priors (generally jeffrey's priors) are used. |
|
| 40 |
#' The \code{posterior} part of output can also be recycled as a new prior if Bayesian
|
|
| 41 |
#' updating is appropriate for your use. |
|
| 42 |
#' @param plot Logical, should a ggplot be made and returned. |
|
| 43 |
#' @param rope_range Optional vector specifying a region of practical equivalence. |
|
| 44 |
#' This interval is considered practically equivalent to no effect. |
|
| 45 |
#' Kruschke (2018) suggests c(-0.1, 0.1) as a broadly reasonable ROPE for standardized parameters. |
|
| 46 |
#' That range could also be rescaled by a standard deviation/magnitude for |
|
| 47 |
#' non-standardized parameters, but ultimately this should be informed by your |
|
| 48 |
#' setting and scientific question. |
|
| 49 |
#' See Kruschke (2018) for details on ROPE and other Bayesian methods to aide |
|
| 50 |
#' decision-making \doi{10.1177/2515245918771304}
|
|
| 51 |
#' and \doi{10.1037/a0029146}.
|
|
| 52 |
#' @param rope_ci The credible interval probability to use for ROPE. Defaults to 0.89. |
|
| 53 |
#' @param cred.int.level The credible interval probability to use |
|
| 54 |
#' in computing HDI for samples, defaults to 0.89. |
|
| 55 |
#' @param hypothesis Direction of a hypothesis if two samples are provided. |
|
| 56 |
#' Options are "unequal", "equal", "greater", and "lesser", |
|
| 57 |
#' read as "sample1 greater than sample2". |
|
| 58 |
#' @param support Optional support vector to include all possible values the random variable |
|
| 59 |
#' (samples) might take. This defaults to NULL in which case each method will use default |
|
| 60 |
#' behavior to attempt to calculate a dense support, but it is a good idea to supply this |
|
| 61 |
#' with some suitable vector. For example, the Beta method uses \code{seq(0.0001, 0.9999, 0.0001)}
|
|
| 62 |
#' for support. |
|
| 63 |
#' |
|
| 64 |
#' @import bayestestR |
|
| 65 |
#' @import ggplot2 |
|
| 66 |
#' @import patchwork |
|
| 67 |
#' @import extraDistr |
|
| 68 |
#' @importFrom stats var median rnorm dnorm qbeta rbeta dbeta qbeta rlnorm |
|
| 69 |
#' dlnorm qlnorm qt rgamma qgamma dgamma rpois runif |
|
| 70 |
#' |
|
| 71 |
#' @details |
|
| 72 |
#' |
|
| 73 |
#' Prior distributions default to be weakly informative and in some cases you may wish to change them. |
|
| 74 |
#' \itemize{
|
|
| 75 |
#' \item{\strong{"t" and "gaussian":} \code{priors = list( mu=c(0,0),n=c(1,1),s2=c(20,20) ) },
|
|
| 76 |
#' where mu is the mean, n is the number of prior observations, and s2 is variance} |
|
| 77 |
#' \item{\strong{"beta", "bernoulli", and "binomial":}
|
|
| 78 |
#' \code{priors = list( a=c(0.5, 0.5), b=c(0.5, 0.5) )},
|
|
| 79 |
#' where a and b are shape parameters of the beta distribution. Note that for the binomial |
|
| 80 |
#' distribution this is used as the prior for success probability P, |
|
| 81 |
#' which is assumed to be beta distributed as in a beta-binomial distribution.} |
|
| 82 |
#' \item{\strong{"lognormal": } \code{priors = list(mu = 0, sd = 5) },
|
|
| 83 |
#' where mu and sd describe the normal distribution of the mean parameter for lognormal data. |
|
| 84 |
#' Note that these values are on the log scale.} |
|
| 85 |
#' \item{\strong{"lognormal2": } \code{priors = list(a = 1, b = 1) },
|
|
| 86 |
#' where a and b are the shape and scale parameters of the gamma distribution of lognormal data's |
|
| 87 |
#' precision parameter (using the alternative mu, precision paramterization). |
|
| 88 |
#' } |
|
| 89 |
#' \item{\strong{"gamma": } \code{priors = list(shape = 0.5, scale = 0.5, known_shape = 1)},
|
|
| 90 |
#' where shape and scale are the respective parameters of the gamma distributed rate |
|
| 91 |
#' (inverse of scale) parameter of gamma distributed data.} |
|
| 92 |
#' \item{\strong{"poisson" and "exponential": } \code{priors = list(a=c(0.5,0.5),b=c(0.5,0.5))},
|
|
| 93 |
#' where a and b are shape parameters of the gamma distribution.} |
|
| 94 |
#' \item{\strong{"negbin": } \code{priors = list(r=c(10,10), a=c(0.5,0.5),b=c(0.5,0.5))},
|
|
| 95 |
#' where r is the r parameter of the negative binomial distribution |
|
| 96 |
#' (representing the number of successes required) |
|
| 97 |
#' and where a and b are shape parameters of the beta distribution. |
|
| 98 |
#' Note that the r value is not updated. |
|
| 99 |
#' The conjugate beta prior is only valid when r is fixed and known, |
|
| 100 |
#' which is a limitation for this method.} |
|
| 101 |
#' \item{\strong{"uniform": } \code{list(scale = 0.5, location = 0.5)}, where scale is the
|
|
| 102 |
#' scale parameter of the pareto distributed upper boundary and location is the location parameter |
|
| 103 |
#' of the pareto distributed upper boundary. Note that different sources will use different |
|
| 104 |
#' terminology for these parameters. These names were chosen for consistency with the |
|
| 105 |
#' \code{extraDistr} implementation of the pareto distribution. On Wikipedia the parameters are
|
|
| 106 |
#' called shape and scale, corresponding to extraDistr's scale and location respecitvely, which |
|
| 107 |
#' can be confusing. Note that the lower boundary of the uniform is assumed to be 0. |
|
| 108 |
#' } |
|
| 109 |
#' \item{\strong{"pareto": } \code{list(a = 1, b = 1, known_location = min(data))}, where
|
|
| 110 |
#' a and b are the shape and scale parameters of the gamma distribution of the pareto distribution's |
|
| 111 |
#' scale parameter. In this case location is assumed to be constant and known, which is less of |
|
| 112 |
#' a limitation than knowing r for the negative binomial method since location will generally be |
|
| 113 |
#' right around/just under the minimum of the sample data. Note that the pareto method is only |
|
| 114 |
#' implemented currently for single value traits since one of the statistics needed to update |
|
| 115 |
#' the gamma distribution here is the product of the data and we do not currently have a method |
|
| 116 |
#' to calculate a similar sufficient statistic from multi value traits. |
|
| 117 |
#' } |
|
| 118 |
#' \item{\strong{"vonmises": } \code{list(mu = 0, kappa = 0.5, boundary = c(-pi, pi),
|
|
| 119 |
#' known_kappa = 1, n = 1)}, where mu is the direction of the circular distribution (the mean), |
|
| 120 |
#' kappa is the precision of the mean, boundary is a vector including the two values that are the |
|
| 121 |
#' where the circular data "wraps" around the circle, known_kappa is the fixed value of precision |
|
| 122 |
#' for the total distribution, and n is the number of prior observations. This Von Mises option |
|
| 123 |
#' updates the conjugate prior for the mean direction, which is itself Von-Mises distributed. This |
|
| 124 |
#' in some ways is analogous to the T method, but assuming a fixed variance when the mean is |
|
| 125 |
#' updated. Note that due to how the rescaling works larger circular boundaries can be slow to |
|
| 126 |
#' plot. |
|
| 127 |
#' } |
|
| 128 |
#' \item{\strong{"vonmises2": } \code{priors = list(mu = 0, kappa = 0.5,
|
|
| 129 |
#' boundary = c(-pi, pi), n = 1)}, where mu and kappa are mean direction and precision of the |
|
| 130 |
#' von mises distribution, boundary is a vector including the two values that are the |
|
| 131 |
#' where the circular data "wraps" around the circle, and n is the number of prior observations. |
|
| 132 |
#' This Von-Mises implementation does not assume constant variance and instead uses MLE to estimate |
|
| 133 |
#' kappa from the data and updates the kappa prior as a weighted average of the data and the prior. |
|
| 134 |
#' The mu parameter is then updated per Von-Mises conjugacy. |
|
| 135 |
#' } |
|
| 136 |
#' \item{\strong{"bivariate_uniform": }
|
|
| 137 |
#' \code{list(location_l = 1, location_u = 2, scale = 1)}, where scale is the
|
|
| 138 |
#' shared scale parameter of the pareto distributed upper and lower boundaries and location l and u |
|
| 139 |
#' are the location parameters for the Lower (l) and Upper (u) boundaries of the uniform |
|
| 140 |
#' distribution. Note this uses the same terminology for the pareto distribution's parameters |
|
| 141 |
#' as the "uniform" method. |
|
| 142 |
#' } |
|
| 143 |
#' \item{\strong{"bivariate_gaussian" and "bivariate_lognormal": }
|
|
| 144 |
#' \code{list(mu = 0, sd = 10, a = 1, b = 1)}, where mu and sd
|
|
| 145 |
#' are the mean and standard deviation of the Normal distribution of the data's mean and a and b |
|
| 146 |
#' are the shape and scale of the gamma distribution on precision. Note that internally this uses |
|
| 147 |
#' the Mu and Precision parameterization of the normal distribution and those are the parameters |
|
| 148 |
#' shown in the plot and tested, but priors use Mu and SD for the normal distribution of the mean. |
|
| 149 |
#' } |
|
| 150 |
#' } |
|
| 151 |
#' |
|
| 152 |
#' See examples for plots of these prior distributions. |
|
| 153 |
#' |
|
| 154 |
#' @examples |
|
| 155 |
#' mv_ln <- mvSim( |
|
| 156 |
#' dists = list( |
|
| 157 |
#' rlnorm = list(meanlog = log(130), sdlog = log(1.2)), |
|
| 158 |
#' rlnorm = list(meanlog = log(100), sdlog = log(1.3)) |
|
| 159 |
#' ), |
|
| 160 |
#' n_samples = 30 |
|
| 161 |
#' ) |
|
| 162 |
#' |
|
| 163 |
#' # lognormal mv |
|
| 164 |
#' ln_mv_ex <- conjugate( |
|
| 165 |
#' s1 = mv_ln[1:30, -1], s2 = mv_ln[31:60, -1], method = "lognormal", |
|
| 166 |
#' priors = list(mu = 5, sd = 2), |
|
| 167 |
#' plot = FALSE, rope_range = c(-40, 40), rope_ci = 0.89, |
|
| 168 |
#' cred.int.level = 0.89, hypothesis = "equal", support = NULL |
|
| 169 |
#' ) |
|
| 170 |
#' |
|
| 171 |
#' # lognormal sv |
|
| 172 |
#' ln_sv_ex <- conjugate( |
|
| 173 |
#' s1 = rlnorm(100, log(130), log(1.3)), s2 = rlnorm(100, log(100), log(1.6)), |
|
| 174 |
#' method = "lognormal", |
|
| 175 |
#' priors = list(mu = 5, sd = 2), |
|
| 176 |
#' plot = FALSE, rope_range = NULL, rope_ci = 0.89, |
|
| 177 |
#' cred.int.level = 0.89, hypothesis = "equal", support = NULL |
|
| 178 |
#' ) |
|
| 179 |
#' |
|
| 180 |
#' # Z test mv example |
|
| 181 |
#' |
|
| 182 |
#' mv_gauss <- mvSim( |
|
| 183 |
#' dists = list( |
|
| 184 |
#' rnorm = list(mean = 50, sd = 10), |
|
| 185 |
#' rnorm = list(mean = 60, sd = 12) |
|
| 186 |
#' ), |
|
| 187 |
#' n_samples = 30 |
|
| 188 |
#' ) |
|
| 189 |
#' |
|
| 190 |
#' gauss_mv_ex <- conjugate( |
|
| 191 |
#' s1 = mv_gauss[1:30, -1], s2 = mv_gauss[31:60, -1], method = "gaussian", |
|
| 192 |
#' priors = list(mu = 30, n = 1, s2 = 100), |
|
| 193 |
#' plot = FALSE, rope_range = c(-25, 25), rope_ci = 0.89, |
|
| 194 |
#' cred.int.level = 0.89, hypothesis = "equal", support = NULL |
|
| 195 |
#' ) |
|
| 196 |
#' |
|
| 197 |
#' # T test sv example |
|
| 198 |
#' |
|
| 199 |
#' gaussianMeans_sv_ex <- conjugate( |
|
| 200 |
#' s1 = rnorm(10, 50, 10), s2 = rnorm(10, 60, 12), method = "t", |
|
| 201 |
#' priors = list(mu = 30, n = 1, s2 = 100), |
|
| 202 |
#' plot = FALSE, rope_range = c(-5, 8), rope_ci = 0.89, |
|
| 203 |
#' cred.int.level = 0.89, hypothesis = "equal", support = NULL |
|
| 204 |
#' ) |
|
| 205 |
#' |
|
| 206 |
#' # beta mv example |
|
| 207 |
#' |
|
| 208 |
#' set.seed(123) |
|
| 209 |
#' mv_beta <- mvSim( |
|
| 210 |
#' dists = list( |
|
| 211 |
#' rbeta = list(shape1 = 5, shape2 = 8), |
|
| 212 |
#' rbeta = list(shape1 = 10, shape2 = 10) |
|
| 213 |
#' ), |
|
| 214 |
#' n_samples = c(30, 20) |
|
| 215 |
#' ) |
|
| 216 |
#' |
|
| 217 |
#' beta_mv_ex <- conjugate( |
|
| 218 |
#' s1 = mv_beta[1:30, -1], s2 = mv_beta[31:50, -1], method = "beta", |
|
| 219 |
#' priors = list(a = 0.5, b = 0.5), |
|
| 220 |
#' plot = FALSE, rope_range = c(-0.1, 0.1), rope_ci = 0.89, |
|
| 221 |
#' cred.int.level = 0.89, hypothesis = "equal" |
|
| 222 |
#' ) |
|
| 223 |
#' |
|
| 224 |
#' # beta sv example |
|
| 225 |
#' |
|
| 226 |
#' beta_sv_ex <- conjugate( |
|
| 227 |
#' s1 = rbeta(20, 5, 5), s2 = rbeta(20, 8, 5), method = "beta", |
|
| 228 |
#' priors = list(a = 0.5, b = 0.5), |
|
| 229 |
#' plot = FALSE, rope_range = c(-0.1, 0.1), rope_ci = 0.89, |
|
| 230 |
#' cred.int.level = 0.89, hypothesis = "equal" |
|
| 231 |
#' ) |
|
| 232 |
#' |
|
| 233 |
#' # binomial sv example |
|
| 234 |
#' # note that specifying trials = 20 would also work |
|
| 235 |
#' # and the number of trials will be recycled to the length of successes |
|
| 236 |
#' |
|
| 237 |
#' binomial_sv_ex <- conjugate( |
|
| 238 |
#' s1 = list(successes = c(15, 14, 16, 11), trials = c(20, 20, 20, 20)), |
|
| 239 |
#' s2 = list(successes = c(7, 8, 10, 5), trials = c(20, 20, 20, 20)), method = "binomial", |
|
| 240 |
#' priors = list(a = 0.5, b = 0.5), |
|
| 241 |
#' plot = FALSE, rope_range = c(-0.1, 0.1), rope_ci = 0.89, |
|
| 242 |
#' cred.int.level = 0.89, hypothesis = "equal" |
|
| 243 |
#' ) |
|
| 244 |
#' |
|
| 245 |
#' # poisson sv example |
|
| 246 |
#' |
|
| 247 |
#' poisson_sv_ex <- conjugate( |
|
| 248 |
#' s1 = rpois(20, 10), s2 = rpois(20, 8), method = "poisson", |
|
| 249 |
#' priors = list(a = 0.5, b = 0.5), |
|
| 250 |
#' plot = FALSE, rope_range = c(-1, 1), rope_ci = 0.89, |
|
| 251 |
#' cred.int.level = 0.89, hypothesis = "equal" |
|
| 252 |
#' ) |
|
| 253 |
#' |
|
| 254 |
#' # negative binomial sv example |
|
| 255 |
#' # knowing r (required number of successes) is an important caveat for this method. |
|
| 256 |
#' # in the current implementation we suggest using the poisson method for data such as leaf counts |
|
| 257 |
#' |
|
| 258 |
#' negbin_sv_ex <- conjugate( |
|
| 259 |
#' s1 = rnbinom(20, 10, 0.5), s2 = rnbinom(20, 10, 0.25), method = "negbin", |
|
| 260 |
#' priors = list(r = 10, a = 0.5, b = 0.5), |
|
| 261 |
#' plot = FALSE, rope_range = c(-1, 1), rope_ci = 0.89, |
|
| 262 |
#' cred.int.level = 0.89, hypothesis = "equal" |
|
| 263 |
#' ) |
|
| 264 |
#' |
|
| 265 |
#' # von mises mv example |
|
| 266 |
#' |
|
| 267 |
#' mv_gauss <- mvSim( |
|
| 268 |
#' dists = list( |
|
| 269 |
#' rnorm = list(mean = 50, sd = 10), |
|
| 270 |
#' rnorm = list(mean = 60, sd = 12) |
|
| 271 |
#' ), |
|
| 272 |
#' n_samples = c(30, 40) |
|
| 273 |
#' ) |
|
| 274 |
#' vm1_ex <- conjugate( |
|
| 275 |
#' s1 = mv_gauss[1:30, -1], |
|
| 276 |
#' s2 = mv_gauss[31:70, -1], |
|
| 277 |
#' method = "vonmises", |
|
| 278 |
#' priors = list(mu = 45, kappa = 1, boundary = c(0, 180), known_kappa = 1, n = 1), |
|
| 279 |
#' plot = FALSE, rope_range = c(-1, 1), rope_ci = 0.89, |
|
| 280 |
#' cred.int.level = 0.89, hypothesis = "equal" |
|
| 281 |
#' ) |
|
| 282 |
#' |
|
| 283 |
#' # von mises 2 sv example |
|
| 284 |
#' vm2_ex <- conjugate( |
|
| 285 |
#' s1 = brms::rvon_mises(10, 2, 2), |
|
| 286 |
#' s2 = brms::rvon_mises(15, 3, 3), |
|
| 287 |
#' method = "vonmises2", |
|
| 288 |
#' priors = list(mu = 0, kappa = 0.5, boundary = c(-pi, pi), n = 1), |
|
| 289 |
#' cred.int.level = 0.95, |
|
| 290 |
#' plot = FALSE |
|
| 291 |
#' ) |
|
| 292 |
#' |
|
| 293 |
#' @return |
|
| 294 |
#' |
|
| 295 |
#' A list with named elements: |
|
| 296 |
#' \itemize{
|
|
| 297 |
#' \item{\strong{summary}: A data frame containing HDI/HDE values for each sample and
|
|
| 298 |
#' the ROPE as well as posterior probability of the hypothesis.} |
|
| 299 |
#' \item{\strong{posterior}: A list of updated parameters in the same format as the prior
|
|
| 300 |
#' for the given method. If desired this does allow for Bayesian updating.} |
|
| 301 |
#' \item{\strong{plot_df}: A data frame of probabilities along the support for each sample.
|
|
| 302 |
#' This is used for making the ggplot.} |
|
| 303 |
#' \item{\strong{rope_df}: A data frame of draws from the ROPE posterior.}
|
|
| 304 |
#' \item{\strong{plot}: A ggplot showing the distribution of samples and optionally the
|
|
| 305 |
#' distribution of differences/ROPE} |
|
| 306 |
#' } |
|
| 307 |
#' |
|
| 308 |
#' @keywords bayesian conjugate priors ROPE |
|
| 309 |
#' @export |
|
| 310 | ||
| 311 |
conjugate <- function(s1 = NULL, s2 = NULL, |
|
| 312 |
method = c( |
|
| 313 |
"t", "gaussian", "beta", "binomial", |
|
| 314 |
"lognormal", "lognormal2", "poisson", "negbin", "vonmises", "vonmises2", |
|
| 315 |
"uniform", "pareto", "gamma", "bernoulli", "exponential", "bivariate_uniform", |
|
| 316 |
"bivariate_gaussian", "bivariate_lognormal" |
|
| 317 |
), |
|
| 318 |
priors = NULL, plot = FALSE, rope_range = NULL, |
|
| 319 |
rope_ci = 0.89, cred.int.level = 0.89, hypothesis = "equal", |
|
| 320 |
support = NULL) {
|
|
| 321 |
#* `Handle formula option in s1` |
|
| 322 | 81x |
samples <- .formatSamples(s1, s2) |
| 323 | 80x |
s1 <- samples$s1 |
| 324 | 80x |
s2 <- samples$s2 |
| 325 |
#* `check length of method, replicate if there is a second sample` |
|
| 326 | 80x |
if (length(method) == 1 && !is.null(s2)) {
|
| 327 | 74x |
method <- rep(method, 2) |
| 328 |
} |
|
| 329 | 80x |
if (!is.null(priors) && !methods::is(priors[[1]], "list")) {
|
| 330 | 39x |
priors <- list(priors, priors) |
| 331 |
} |
|
| 332 | 80x |
samplesList <- list(s1) |
| 333 | 80x |
if (!is.null(s2)) {
|
| 334 | 76x |
samplesList[[2]] <- s2 |
| 335 |
} |
|
| 336 | ||
| 337 | 80x |
if (is.null(support)) {
|
| 338 | 79x |
support <- .getSupport(samplesList, method, priors) # calculate shared support |
| 339 |
} |
|
| 340 | ||
| 341 | 72x |
sample_results <- lapply(seq_along(samplesList), function(i) {
|
| 342 | 141x |
sample <- samplesList[[i]] |
| 343 | 141x |
prior <- priors[[i]] |
| 344 |
#* `Check sample class` |
|
| 345 | 141x |
if (is.matrix(sample) | is.data.frame(sample)) {
|
| 346 | 51x |
vec_suffix <- "mv" |
| 347 | 51x |
sample <- .mvSampleFormatting(sample) |
| 348 | 90x |
} else if (is.vector(sample)) {
|
| 349 | 90x |
vec_suffix <- "sv" |
| 350 |
} |
|
| 351 | 141x |
matched_arg <- match.arg(method[i], choices = c( |
| 352 | 141x |
"t", "gaussian", "beta", "binomial", |
| 353 | 141x |
"lognormal", "lognormal2", "poisson", "negbin", |
| 354 | 141x |
"vonmises", "vonmises2", |
| 355 | 141x |
"uniform", "pareto", "gamma", "bernoulli", "exponential", |
| 356 | 141x |
"bivariate_uniform", "bivariate_gaussian", "bivariate_lognormal" |
| 357 |
)) |
|
| 358 | 141x |
matched_fun <- get(paste0(".conj_", matched_arg, "_", vec_suffix))
|
| 359 | 141x |
res <- matched_fun(sample, prior, plot, support, cred.int.level) |
| 360 | 140x |
return(res) |
| 361 |
}) |
|
| 362 |
#* `combine results into an object to return` |
|
| 363 | 71x |
out <- list() |
| 364 | 71x |
out$summary <- do.call(cbind, lapply(seq_along(sample_results), function(i) {
|
| 365 | 140x |
s <- sample_results[[i]]$summary |
| 366 | 140x |
if (i == 2) {
|
| 367 | 69x |
s <- s[, !grepl("param", colnames(s))]
|
| 368 | 69x |
colnames(s) <- gsub("1", "2", colnames(s))
|
| 369 |
} |
|
| 370 | 140x |
s |
| 371 |
})) |
|
| 372 | 71x |
if (!is.null(s2)) {
|
| 373 | 69x |
postProbRes <- .pdf.handling(sample_results[[1]]$pdf, sample_results[[2]]$pdf, hypothesis) |
| 374 | 68x |
out$summary <- cbind( |
| 375 | 68x |
out$summary, |
| 376 | 68x |
data.frame("hyp" = hypothesis, "post.prob" = as.numeric(postProbRes$post.prob))
|
| 377 |
) |
|
| 378 | 68x |
dirSymbol <- postProbRes$direction |
| 379 |
} else {
|
|
| 380 | 2x |
dirSymbol <- NULL |
| 381 |
} |
|
| 382 |
#* `parse output and do ROPE` |
|
| 383 | 70x |
if (!is.null(rope_range)) {
|
| 384 | 56x |
rope_res <- .conj_rope(sample_results, rope_range, rope_ci, plot, method) |
| 385 | 55x |
out$summary <- cbind(out$summary, rope_res$summary) |
| 386 |
} else {
|
|
| 387 | 14x |
rope_res <- NULL |
| 388 |
} |
|
| 389 | 69x |
out$posterior <- lapply(sample_results, function(s) s$posterior) |
| 390 | ||
| 391 |
#* `Make plot` |
|
| 392 | 69x |
if (plot) {
|
| 393 | 42x |
out$plot <- .conj_plot(sample_results, rope_res, |
| 394 | 42x |
res = out, |
| 395 | 42x |
rope_range, rope_ci, dirSymbol, support, method |
| 396 |
) |
|
| 397 |
} |
|
| 398 | 69x |
return(out) |
| 399 |
} |
|
| 400 | ||
| 401 |
#' *********************************************************************************************** |
|
| 402 |
#' *************** `Formula Handling Helper function` *********************************** |
|
| 403 |
#' *********************************************************************************************** |
|
| 404 | ||
| 405 |
#' @keywords internal |
|
| 406 |
#' @noRd |
|
| 407 | ||
| 408 |
.formatSamples <- function(s1 = NULL, s2 = NULL) {
|
|
| 409 | 81x |
if (methods::is(s1, "formula")) {
|
| 410 | 3x |
if (!is.data.frame(s2)) {
|
| 411 | 1x |
stop("If s1 is a formula then s2 must be a data.frame")
|
| 412 |
} |
|
| 413 | 2x |
rhs <- as.character(s1)[3] |
| 414 | 2x |
lhs <- as.character(s1)[2] |
| 415 | 2x |
if (lhs %in% colnames(s2)) { # handle SV traits
|
| 416 | 1x |
samples <- split(s2[[lhs]], s2[[rhs]]) |
| 417 |
} else { # handle MV traits
|
|
| 418 | 1x |
samples <- lapply(split(s2, s2[[rhs]]), function(d) {
|
| 419 | 2x |
d[, eval(str2lang(lhs))] |
| 420 |
}) |
|
| 421 |
} |
|
| 422 | 2x |
names(samples) <- c("s1", "s2")
|
| 423 | 2x |
return(samples) |
| 424 |
} else {
|
|
| 425 | 78x |
return(list("s1" = s1, "s2" = s2))
|
| 426 |
} |
|
| 427 |
} |
|
| 428 | ||
| 429 |
#' *********************************************************************************************** |
|
| 430 |
#' *************** `MV Sample Formatting Helper function` *********************************** |
|
| 431 |
#' *********************************************************************************************** |
|
| 432 | ||
| 433 |
#' @keywords internal |
|
| 434 |
#' @noRd |
|
| 435 | ||
| 436 |
.mvSampleFormatting <- function(sample) {
|
|
| 437 |
#* `Standardize sample class and names` |
|
| 438 | 52x |
if (is.matrix(sample)) {
|
| 439 | 1x |
original_names <- colnames(sample) |
| 440 | 1x |
sample <- as.data.frame(sample) |
| 441 | 1x |
colnames(sample) <- original_names |
| 442 |
} |
|
| 443 | 52x |
if (is.null(colnames(sample))) {
|
| 444 | 1x |
bins <- (seq_along(sample)) |
| 445 | 1x |
colnames(sample) <- paste0("b", bins)
|
| 446 | 1x |
warning(paste0("Assuming unnamed columns represent bins from ", min(bins), " to ", max(bins)))
|
| 447 |
} |
|
| 448 | 52x |
return(sample) |
| 449 |
} |
|
| 450 | ||
| 451 |
#' *********************************************************************************************** |
|
| 452 |
#' *************** `Support Calculating function` *********************************** |
|
| 453 |
#' *********************************************************************************************** |
|
| 454 |
#' |
|
| 455 |
#' |
|
| 456 |
#' @keywords internal |
|
| 457 |
#' @noRd |
|
| 458 | ||
| 459 | ||
| 460 |
.getSupport <- function(samplesList, method, priors) {
|
|
| 461 |
#* `Check for bivarate` |
|
| 462 | 79x |
if (any(grepl("bivariate", method[1]))) {
|
| 463 | 7x |
biv <- TRUE |
| 464 |
} else {
|
|
| 465 | 72x |
biv <- FALSE |
| 466 |
} |
|
| 467 | 79x |
support_quantiles <- lapply(seq_along(samplesList), function(i) {
|
| 468 | 148x |
sample <- samplesList[[i]] |
| 469 | 148x |
prior <- priors[[i]] |
| 470 |
#* `Check sample class` |
|
| 471 | 148x |
if (is.matrix(sample) | is.data.frame(sample)) {
|
| 472 | 54x |
vec <- FALSE |
| 473 | 94x |
} else if (is.vector(sample)) {
|
| 474 | 94x |
vec <- TRUE |
| 475 |
} |
|
| 476 | 148x |
vec_suffix <- if (vec) {
|
| 477 | 94x |
"sv" |
| 478 |
} else {
|
|
| 479 | 54x |
"mv" |
| 480 |
} |
|
| 481 | 148x |
matched_fun <- get(paste0(".conj_", method[i], "_", vec_suffix))
|
| 482 | 148x |
qnts <- matched_fun(s1 = sample, priors = prior, calculatingSupport = TRUE) |
| 483 | 140x |
return(qnts) |
| 484 |
}) |
|
| 485 | 71x |
if (biv) {
|
| 486 | 7x |
pars <- names(support_quantiles[[1]]) |
| 487 | 7x |
support <- lapply(pars, function(param) {
|
| 488 | 14x |
qnts <- range(unlist(lapply(support_quantiles, function(sq) {
|
| 489 | 26x |
sq[[param]] |
| 490 |
}))) |
|
| 491 | 14x |
seq(qnts[1], qnts[2], length.out = 10000) |
| 492 |
}) |
|
| 493 | 7x |
names(support) <- pars |
| 494 |
} else {
|
|
| 495 | 64x |
qnts <- range(unlist(support_quantiles)) |
| 496 | 64x |
support <- seq(qnts[1], qnts[2], length.out = 10000) |
| 497 |
} |
|
| 498 | 71x |
return(support) |
| 499 |
} |
|
| 500 | ||
| 501 | ||
| 502 | ||
| 503 |
#' *********************************************************************************************** |
|
| 504 |
#' *************** `ROPE testing on two conjugateHelper outputs` *********************************** |
|
| 505 |
#' *********************************************************************************************** |
|
| 506 |
#' |
|
| 507 |
#' this should take outputs from conjHelpers and compare the $posteriorDraws. |
|
| 508 |
#' @keywords internal |
|
| 509 |
#' @noRd |
|
| 510 |
.conj_rope <- function(sample_results, rope_range = c(-0.1, 0.1), |
|
| 511 |
rope_ci = 0.89, plot, method) {
|
|
| 512 |
#* `if bivariate then call the bivariate option` |
|
| 513 |
#* note this will return to .conj_rope but with a non-bivariate method |
|
| 514 | 70x |
if (any(grepl("bivariate", method))) {
|
| 515 | 7x |
rope_res <- .conj_bivariate_rope( |
| 516 | 7x |
sample_results, rope_range, |
| 517 | 7x |
rope_ci, plot, method |
| 518 |
) |
|
| 519 | 7x |
return(rope_res) |
| 520 |
} |
|
| 521 |
#* `ROPE Comparison` |
|
| 522 | 63x |
rope_res <- list() |
| 523 | 63x |
if (!is.null(rope_range)) {
|
| 524 | 63x |
if (length(rope_range) == 2) {
|
| 525 | 62x |
post1 <- sample_results[[1]]$posteriorDraws |
| 526 | 62x |
if (length(sample_results) == 2) {
|
| 527 | 59x |
post2 <- sample_results[[2]]$posteriorDraws |
| 528 | 59x |
if (any(grepl("vonmises", method))) {
|
| 529 | 11x |
boundary <- sample_results[[1]]$posterior$boundary |
| 530 | 11x |
posterior <- .conj_rope_circular_diff(post1, post2, boundary = boundary) |
| 531 |
} else {
|
|
| 532 | 48x |
posterior <- post1 - post2 |
| 533 |
} |
|
| 534 |
} else {
|
|
| 535 | 3x |
posterior <- post1 |
| 536 |
} |
|
| 537 | 62x |
hdi_diff <- as.numeric(bayestestR::hdi(posterior, ci = rope_ci))[2:3] |
| 538 | 62x |
hde_diff <- median(posterior) |
| 539 | 62x |
rope_prob <- as.numeric(bayestestR::rope(posterior, |
| 540 | 62x |
range = rope_range, |
| 541 | 62x |
ci_method = "HDI", ci = rope_ci |
| 542 |
)) |
|
| 543 | 62x |
rope_test <- data.frame( |
| 544 | 62x |
HDE_rope = hde_diff, HDI_rope_low = hdi_diff[1], |
| 545 | 62x |
HDI_rope_high = hdi_diff[2], rope_prob = rope_prob |
| 546 |
) |
|
| 547 | 62x |
rope_res$summary <- rope_test |
| 548 | 62x |
if (plot) {
|
| 549 | 49x |
rope_res$rope_df <- data.frame("X" = posterior)
|
| 550 |
} |
|
| 551 |
} else {
|
|
| 552 | 1x |
stop("rope must be a vector of length 2")
|
| 553 |
} |
|
| 554 |
} |
|
| 555 | 62x |
return(rope_res) |
| 556 |
} |
|
| 557 | ||
| 558 |
#' *********************************************************************************************** |
|
| 559 |
#' *************** `ROPE testing on two conjugate_bivariate_Helper outputs` ******************** |
|
| 560 |
#' *********************************************************************************************** |
|
| 561 |
#' |
|
| 562 |
#' this should take outputs from conjHelpers and compare the $posteriorDraws. |
|
| 563 |
#' @keywords internal |
|
| 564 |
#' @noRd |
|
| 565 |
.conj_bivariate_rope <- function(sample_results, rope_range = c(-0.1, 0.1), |
|
| 566 |
rope_ci = 0.89, plot, method) {
|
|
| 567 |
#* `Format rope_range` |
|
| 568 | 7x |
rope_res <- list() |
| 569 | 7x |
if (!is.list(rope_range)) {
|
| 570 | 7x |
rope_range <- lapply(sample_results[[1]]$summary$param, function(par) {
|
| 571 | 14x |
rope_range |
| 572 |
}) |
|
| 573 | 7x |
names(rope_range) <- sample_results[[1]]$summary$param |
| 574 |
} |
|
| 575 |
#* `ROPE Comparison` |
|
| 576 | 7x |
rope_res <- lapply(names(rope_range), function(nm) {
|
| 577 | 14x |
iter_rope_range <- rope_range[[nm]] |
| 578 | 14x |
sample_results_param <- lapply(sample_results, function(s_res) {
|
| 579 | 26x |
s_res$posteriorDraws <- s_res$posteriorDraws[[nm]] |
| 580 | 26x |
s_res$pdf <- s_res$pdf[[nm]] |
| 581 | 26x |
s_res$summary <- s_res$summary[s_res$summary$param == nm, ] |
| 582 | 26x |
s_res$plot_df <- s_res$plot_df[s_res$plot_df$param == nm, ] |
| 583 | 26x |
s_res |
| 584 |
}) |
|
| 585 | 14x |
.conj_rope(sample_results_param, |
| 586 | 14x |
rope_range = iter_rope_range, |
| 587 | 14x |
rope_ci = rope_ci, plot, method = "NONE" |
| 588 |
) |
|
| 589 |
}) |
|
| 590 | 7x |
rope_res$summary <- do.call(rbind, lapply(rope_res, function(r) {
|
| 591 | 14x |
r$summary |
| 592 |
})) |
|
| 593 | 7x |
return(rope_res) |
| 594 |
} |
|
| 595 | ||
| 596 |
#' *********************************************************************************************** |
|
| 597 |
#' *************** `Handle PDFs for testing` *********************************** |
|
| 598 |
#' *********************************************************************************************** |
|
| 599 |
#' @keywords internal |
|
| 600 |
#' @noRd |
|
| 601 | ||
| 602 |
.pdf.handling <- function(pdf1, pdf2, hypothesis) {
|
|
| 603 | 69x |
if (is.list(pdf1) && is.list(pdf2)) {
|
| 604 | 6x |
pdf.handling.output <- as.data.frame(do.call(rbind, lapply(seq_along(pdf1), function(i) {
|
| 605 | 12x |
.post.prob.from.pdfs(pdf1[[i]], pdf2[[i]], hypothesis) |
| 606 |
}))) |
|
| 607 |
} else {
|
|
| 608 | 63x |
pdf.handling.output <- as.data.frame(.post.prob.from.pdfs(pdf1, pdf2, hypothesis)) |
| 609 |
} |
|
| 610 | 68x |
return(pdf.handling.output) |
| 611 |
} |
|
| 612 | ||
| 613 |
#' *********************************************************************************************** |
|
| 614 |
#' *************** `Calculate Posterior Probability given PDFs` *********************************** |
|
| 615 |
#' *********************************************************************************************** |
|
| 616 | ||
| 617 |
#' @keywords internal |
|
| 618 |
#' @noRd |
|
| 619 |
.post.prob.from.pdfs <- function(pdf1, pdf2, hypothesis) {
|
|
| 620 | 77x |
if (hypothesis == "unequal") {
|
| 621 | 1x |
post.prob <- 1 - sum(apply(cbind(pdf1, pdf2), MARGIN = 1, function(i) min(i)), na.rm = TRUE) |
| 622 | 1x |
dirSymbol <- "!=" |
| 623 | 76x |
} else if (hypothesis == "equal") {
|
| 624 | 71x |
post.prob <- sum(apply(cbind(pdf1, pdf2), MARGIN = 1, function(i) min(i)), na.rm = TRUE) |
| 625 | 71x |
dirSymbol <- "=" |
| 626 | 5x |
} else if (hypothesis == "lesser") { # note one sided testing is less tested generally
|
| 627 | 2x |
direction <- pdf1 <= pdf2 |
| 628 | 2x |
post.prob <- sum(pdf1 * direction, na.rm = TRUE) |
| 629 | 2x |
dirSymbol <- "<" |
| 630 | 3x |
} else if (hypothesis == "greater") {
|
| 631 | 2x |
direction <- pdf1 >= pdf2 |
| 632 | 2x |
post.prob <- sum(pdf1 * direction, na.rm = TRUE) |
| 633 | 2x |
dirSymbol <- ">" |
| 634 |
} else {
|
|
| 635 | 1x |
stop("hypothesis must be either unequal, equal, lesser, or greater")
|
| 636 |
} |
|
| 637 | 76x |
return(list("post.prob" = post.prob, "direction" = dirSymbol))
|
| 638 |
} |
|
| 639 | ||
| 640 |
#' *********************************************************************************************** |
|
| 641 |
#' *************** `General Plotting Function` *********************************** |
|
| 642 |
#' *********************************************************************************************** |
|
| 643 |
#' Used to pick which kind of plotting function to use. |
|
| 644 |
#' @keywords internal |
|
| 645 |
#' @noRd |
|
| 646 | ||
| 647 |
.conj_plot <- function(sample_results, rope_res = NULL, res, |
|
| 648 |
rope_range, rope_ci, dirSymbol = NULL, support, method) {
|
|
| 649 | 42x |
if (any(grepl("bivariate", method))) {
|
| 650 | 7x |
.conj_bivariate_plot(sample_results, rope_res, res, rope_range, rope_ci, dirSymbol) |
| 651 |
} else {
|
|
| 652 | 35x |
.conj_general_plot(sample_results, rope_res, res, rope_range, rope_ci, dirSymbol, support) |
| 653 |
} |
|
| 654 |
} |
|
| 655 | ||
| 656 |
#' *********************************************************************************************** |
|
| 657 |
#' *************** `General Plotting Function` *********************************** |
|
| 658 |
#' *********************************************************************************************** |
|
| 659 | ||
| 660 | ||
| 661 |
#' @keywords internal |
|
| 662 |
#' @noRd |
|
| 663 |
.conj_general_plot <- function(sample_results, rope_res = NULL, res, |
|
| 664 |
rope_range, rope_ci, dirSymbol = NULL, support) {
|
|
| 665 | 35x |
s1_plot_df <- sample_results[[1]]$plot_df |
| 666 | ||
| 667 | 35x |
p <- ggplot2::ggplot(s1_plot_df, ggplot2::aes(x = .data$range, y = .data$prob)) + |
| 668 | 35x |
ggplot2::geom_area(data = s1_plot_df, alpha = 0.5, ggplot2::aes(fill = "s1")) + |
| 669 | 35x |
ggplot2::geom_vline(ggplot2::aes(xintercept = res$summary$HDI_1_low), |
| 670 | 35x |
color = "red", |
| 671 | 35x |
linewidth = 1.1 |
| 672 |
) + |
|
| 673 | 35x |
ggplot2::geom_vline(ggplot2::aes(xintercept = res$summary$HDE_1), |
| 674 | 35x |
color = "red", linetype = "dashed", linewidth = 1.1 |
| 675 |
) + |
|
| 676 | 35x |
ggplot2::geom_vline(ggplot2::aes(xintercept = res$summary$HDI_1_high), |
| 677 | 35x |
color = "red", |
| 678 | 35x |
linewidth = 1.1 |
| 679 |
) + |
|
| 680 | 35x |
ggplot2::scale_fill_manual(values = "red") + |
| 681 | 35x |
ggplot2::labs( |
| 682 | 35x |
x = "Posterior Distribution of Random Variable", y = "Density", title = "Distribution of Samples", |
| 683 | 35x |
subtitle = paste0( |
| 684 | 35x |
"HDE: ", round(res$summary$HDE_1, 2), |
| 685 | 35x |
"\nHDI: [", round(res$summary$HDI_1_low, 2), ", ", |
| 686 | 35x |
round(res$summary$HDI_1_high, 2), "]" |
| 687 |
) |
|
| 688 |
) + |
|
| 689 | 35x |
ggplot2::guides(fill = ggplot2::guide_legend(override.aes = list(alpha = 0.5))) + |
| 690 | 35x |
ggplot2::theme( |
| 691 | 35x |
legend.title = ggplot2::element_blank(), |
| 692 | 35x |
legend.position = "inside", |
| 693 | 35x |
legend.position.inside = c(0.9, 0.9) |
| 694 |
) + |
|
| 695 | 35x |
pcv_theme() |
| 696 | ||
| 697 | 35x |
if (length(sample_results) == 2) {
|
| 698 | 34x |
s2_plot_df <- sample_results[[2]]$plot_df |
| 699 | ||
| 700 | 34x |
if (res$summary$post.prob < 1e-5) {
|
| 701 | 2x |
post.prob.text <- "<1e-5" |
| 702 |
} else {
|
|
| 703 | 32x |
post.prob.text <- round(res$summary$post.prob, 5) |
| 704 |
} |
|
| 705 | ||
| 706 | 34x |
fill_scale <- which(sapply(p$scales$scales, function(x) {
|
| 707 | 34x |
"fill" %in% x$aesthetics # avoid "replacing scale" non-messages that suppress doesn't catch |
| 708 |
})) |
|
| 709 | ||
| 710 | 34x |
p$scales$scales[[fill_scale]] <- NULL |
| 711 | 34x |
p <- p + |
| 712 | 34x |
ggplot2::geom_area(data = s2_plot_df, ggplot2::aes(fill = "s2"), alpha = 0.5) + |
| 713 | 34x |
ggplot2::geom_vline(ggplot2::aes(xintercept = res$summary$HDI_2_low), |
| 714 | 34x |
color = "blue", |
| 715 | 34x |
linewidth = 1.1 |
| 716 |
) + |
|
| 717 | 34x |
ggplot2::geom_vline(ggplot2::aes(xintercept = res$summary$HDE_2), |
| 718 | 34x |
color = "blue", |
| 719 | 34x |
linetype = "dashed", linewidth = 1.1 |
| 720 |
) + |
|
| 721 | 34x |
ggplot2::geom_vline(ggplot2::aes(xintercept = res$summary$HDI_2_high), |
| 722 | 34x |
color = "blue", |
| 723 | 34x |
linewidth = 1.1 |
| 724 |
) + |
|
| 725 | 34x |
ggplot2::scale_fill_manual(values = c("red", "blue"), breaks = c("s1", "s2")) +
|
| 726 | 34x |
ggplot2::guides(fill = ggplot2::guide_legend(override.aes = list(alpha = 0.5))) + |
| 727 | 34x |
ggplot2::theme( |
| 728 | 34x |
legend.title = ggplot2::element_blank(), |
| 729 | 34x |
legend.position = "inside", |
| 730 | 34x |
legend.position.inside = c(0.9, 0.9) |
| 731 |
) + |
|
| 732 | 34x |
ggplot2::labs(subtitle = paste0( |
| 733 | 34x |
"Sample 1: ", round(res$summary$HDE_1, 2), " [", round(res$summary$HDI_1_low, 2), ", ", |
| 734 | 34x |
round(res$summary$HDI_1_high, 2), "]\n", |
| 735 | 34x |
"Sample 2: ", round(res$summary$HDE_2, 2), " [", round(res$summary$HDI_2_low, 2), ", ", |
| 736 | 34x |
round(res$summary$HDI_2_high, 2), "]\n", |
| 737 | 34x |
"P[p1", dirSymbol, "p2] = ", post.prob.text |
| 738 |
)) |
|
| 739 |
} |
|
| 740 | ||
| 741 | 35x |
if (!is.null(rope_res)) {
|
| 742 | 35x |
rdf <- rope_res$rope_df |
| 743 | 35x |
p <- p + ggplot2::ggplot(rdf, ggplot2::aes(x = .data$X)) + |
| 744 | 35x |
ggplot2::geom_histogram(bins = 100, fill = "purple", color = "purple", alpha = 0.7) + |
| 745 | 35x |
ggplot2::geom_histogram( |
| 746 | 35x |
data = data.frame( |
| 747 | 35x |
"X" = rdf[ |
| 748 | 35x |
rdf$X > rope_range[1] & |
| 749 | 35x |
rdf$X < rope_range[2] & |
| 750 | 35x |
rdf$X > res$summary$HDI_rope_low & |
| 751 | 35x |
rdf$X < res$summary$HDI_rope_high, |
| 752 |
] |
|
| 753 |
), |
|
| 754 | 35x |
bins = 100, fill = "gray30", color = "gray30" |
| 755 |
) + |
|
| 756 | 35x |
ggplot2::annotate("segment",
|
| 757 | 35x |
x = rope_range[1], xend = rope_range[2], y = 0, yend = 0, |
| 758 | 35x |
linewidth = 2, color = "gray70" |
| 759 |
) + |
|
| 760 | 35x |
ggplot2::geom_vline(ggplot2::aes(xintercept = res$summary$HDI_rope_low), linewidth = 0.7) + |
| 761 | 35x |
ggplot2::geom_vline(ggplot2::aes(xintercept = res$summary$HDE_rope), |
| 762 | 35x |
linetype = "dashed", |
| 763 | 35x |
linewidth = 0.7 |
| 764 |
) + |
|
| 765 | 35x |
ggplot2::geom_vline(ggplot2::aes(xintercept = res$summary$HDI_rope_high), linewidth = 0.7) + |
| 766 | 35x |
ggplot2::labs( |
| 767 | 35x |
x = "Posterior of Difference", y = "Frequency", title = "Distribution of Difference", |
| 768 | 35x |
subtitle = paste0( |
| 769 | 35x |
"Median Difference of ", round(res$summary$HDE_rope, 2), "\n", |
| 770 | 35x |
100 * rope_ci, "% CI [", round(res$summary$HDI_rope_low, 2), ", ", |
| 771 | 35x |
round(res$summary$HDI_rope_high, 2), "]\n", |
| 772 | 35x |
rope_ci, "% HDI in [", rope_range[1], ", ", rope_range[2], "]: ", |
| 773 | 35x |
round(res$summary$rope_prob, 2) |
| 774 |
) |
|
| 775 |
) + |
|
| 776 | 35x |
pcv_theme() + |
| 777 | 35x |
ggplot2::theme( |
| 778 | 35x |
axis.title.y.left = ggplot2::element_blank(), |
| 779 | 35x |
axis.text.y.left = ggplot2::element_blank() |
| 780 |
) + |
|
| 781 | 35x |
patchwork::plot_layout(widths = c(2, 1)) |
| 782 |
} |
|
| 783 | 35x |
return(p) |
| 784 |
} |
|
| 785 | ||
| 786 | ||
| 787 |
#' *********************************************************************************************** |
|
| 788 |
#' *************** `Bivariate Plotting Function` *********************************** |
|
| 789 |
#' *********************************************************************************************** |
|
| 790 |
#' |
|
| 791 |
#' @keywords internal |
|
| 792 |
#' @noRd |
|
| 793 | ||
| 794 |
.conj_bivariate_plot <- function(sample_results, rope_res = NULL, res, |
|
| 795 |
rope_range, rope_ci, dirSymbol = NULL, support) {
|
|
| 796 | 7x |
TITLE <- "Joint Posterior Distribution" |
| 797 | 7x |
if (length(sample_results) == 1) {
|
| 798 | 1x |
margin_plot_df <- sample_results[[1]]$plot_df |
| 799 | 1x |
params <- unique(margin_plot_df$param) |
| 800 |
#* `Make joint distribution plot` |
|
| 801 | 1x |
joint_dist_s1 <- sample_results[[1]]$posteriorDraws |
| 802 | 1x |
limits <- lapply(params, function(p) {
|
| 803 | 2x |
sub <- margin_plot_df[margin_plot_df$param == p, ] |
| 804 | 2x |
range(sub$range) |
| 805 |
}) |
|
| 806 | 1x |
names(limits) <- params |
| 807 | 1x |
x_lim <- limits[[1]] |
| 808 | 1x |
y_lim <- limits[[2]] |
| 809 | 1x |
v_lines <- res$summary[res$summary$param == params[1], ] |
| 810 | 1x |
h_lines <- res$summary[res$summary$param == params[2], ] |
| 811 | ||
| 812 | 1x |
joint_p <- ggplot2::ggplot( |
| 813 | 1x |
joint_dist_s1, |
| 814 | 1x |
ggplot2::aes( |
| 815 | 1x |
x = .data[[params[1]]], y = .data[[params[2]]], |
| 816 | 1x |
group = .data[["sample"]] |
| 817 |
) |
|
| 818 |
) + |
|
| 819 | 1x |
ggplot2::geom_hline( |
| 820 | 1x |
yintercept = h_lines$HDI_1_low, color = "gray80", linetype = 5, |
| 821 | 1x |
linewidth = 0.25 |
| 822 |
) + |
|
| 823 | 1x |
ggplot2::geom_hline( |
| 824 | 1x |
yintercept = h_lines$HDI_1_high, color = "gray80", linetype = 5, |
| 825 | 1x |
linewidth = 0.25 |
| 826 |
) + |
|
| 827 | 1x |
ggplot2::geom_vline( |
| 828 | 1x |
xintercept = v_lines$HDI_1_low, color = "gray80", linetype = 5, |
| 829 | 1x |
linewidth = 0.25 |
| 830 |
) + |
|
| 831 | 1x |
ggplot2::geom_vline( |
| 832 | 1x |
xintercept = v_lines$HDI_1_high, color = "gray80", linetype = 5, |
| 833 | 1x |
linewidth = 0.25 |
| 834 |
) + |
|
| 835 | 1x |
geom_density_2d_filled(breaks = ~ pretty(., n = 51)[-1], alpha = 0.9) + |
| 836 | 1x |
ggplot2::scale_fill_viridis_d(option = "plasma") + |
| 837 | 1x |
ggplot2::xlim(x_lim) + |
| 838 | 1x |
ggplot2::ylim(y_lim) + |
| 839 | 1x |
pcv_theme() + |
| 840 | 1x |
ggplot2::theme(legend.position = "none") |
| 841 |
#* `Make marginal distribution plot of each parameter (x, y)` |
|
| 842 | 1x |
margin_plots <- lapply(params, function(par) {
|
| 843 | 2x |
ggplot2::ggplot( |
| 844 | 2x |
margin_plot_df[margin_plot_df$param == par, ], |
| 845 | 2x |
ggplot2::aes(x = .data$range, y = .data$prob) |
| 846 |
) + |
|
| 847 | 2x |
ggplot2::geom_area( |
| 848 | 2x |
data = margin_plot_df[margin_plot_df$param == par, ], |
| 849 | 2x |
alpha = 0.5, ggplot2::aes(fill = "s1") |
| 850 |
) + |
|
| 851 | 2x |
ggplot2::geom_vline( |
| 852 | 2x |
data = data.frame(), |
| 853 | 2x |
ggplot2::aes( |
| 854 | 2x |
xintercept = res$summary[res$summary$param == par, "HDI_1_low"] |
| 855 |
), |
|
| 856 | 2x |
color = "red", |
| 857 | 2x |
linewidth = 0.5 |
| 858 |
) + |
|
| 859 | 2x |
ggplot2::geom_vline( |
| 860 | 2x |
data = data.frame(), |
| 861 | 2x |
ggplot2::aes( |
| 862 | 2x |
xintercept = res$summary[res$summary$param == par, "HDE_1"] |
| 863 |
), |
|
| 864 | 2x |
color = "red", linetype = "dashed", linewidth = 0.5 |
| 865 |
) + |
|
| 866 | 2x |
ggplot2::geom_vline( |
| 867 | 2x |
data = data.frame(), |
| 868 | 2x |
ggplot2::aes( |
| 869 | 2x |
xintercept = res$summary[res$summary$param == par, "HDI_1_high"] |
| 870 |
), |
|
| 871 | 2x |
color = "red", linetype = "dashed", linewidth = 0.5 |
| 872 |
) + |
|
| 873 | 2x |
ggplot2::scale_fill_manual(values = "red") + |
| 874 | 2x |
ggplot2::xlim(limits[[par]]) + |
| 875 | 2x |
ggplot2::theme_void() + |
| 876 | 2x |
ggplot2::theme(legend.title = ggplot2::element_blank()) |
| 877 |
}) |
|
| 878 |
#* `Write title if there is only 1 sample` |
|
| 879 | 1x |
SUBTITLE <- NULL |
| 880 | 6x |
} else if (length(sample_results) == 2) {
|
| 881 |
#* `Make plots for sample 2 if it exists` |
|
| 882 | 6x |
margin_plot_df <- do.call(rbind, lapply(1:2, function(i) {
|
| 883 | 12x |
md <- sample_results[[i]]$plot_df |
| 884 | 12x |
md$sample <- paste0("Sample ", i)
|
| 885 | 12x |
md |
| 886 |
})) |
|
| 887 | 6x |
params <- unique(margin_plot_df$param) |
| 888 | 6x |
joint_dist <- do.call(rbind, lapply(1:2, function(i) {
|
| 889 | 12x |
pd <- sample_results[[i]]$posteriorDraws |
| 890 | 12x |
pd$sample <- paste0("Sample ", i)
|
| 891 | 12x |
pd |
| 892 |
})) |
|
| 893 |
#* `Define Limits` |
|
| 894 | 6x |
limits <- lapply(params, function(p) {
|
| 895 | 12x |
sub <- margin_plot_df[margin_plot_df$param == p, ] |
| 896 | 12x |
range(sub$range) |
| 897 |
}) |
|
| 898 | 6x |
names(limits) <- params |
| 899 | 6x |
x_lim <- limits[[1]] |
| 900 | 6x |
y_lim <- limits[[2]] |
| 901 |
#* `Make joint distribution plot` |
|
| 902 | 6x |
v_lines <- res$summary[res$summary$param == params[1], ] |
| 903 | 6x |
h_lines <- res$summary[res$summary$param == params[2], ] |
| 904 | ||
| 905 | 6x |
joint_p <- ggplot2::ggplot( |
| 906 | 6x |
joint_dist, |
| 907 | 6x |
ggplot2::aes( |
| 908 | 6x |
x = .data[[params[1]]], y = .data[[params[2]]], |
| 909 | 6x |
group = .data[["sample"]] |
| 910 |
) |
|
| 911 |
) + |
|
| 912 | 6x |
ggplot2::geom_hline( |
| 913 | 6x |
yintercept = h_lines$HDI_1_low, color = "gray80", linetype = 5, |
| 914 | 6x |
linewidth = 0.25 |
| 915 |
) + |
|
| 916 | 6x |
ggplot2::geom_hline( |
| 917 | 6x |
yintercept = h_lines$HDI_1_high, color = "gray80", linetype = 5, |
| 918 | 6x |
linewidth = 0.25 |
| 919 |
) + |
|
| 920 | 6x |
ggplot2::geom_hline( |
| 921 | 6x |
yintercept = h_lines$HDI_2_low, color = "gray80", linetype = 5, |
| 922 | 6x |
linewidth = 0.25 |
| 923 |
) + |
|
| 924 | 6x |
ggplot2::geom_hline( |
| 925 | 6x |
yintercept = h_lines$HDI_2_high, color = "gray80", linetype = 5, |
| 926 | 6x |
linewidth = 0.25 |
| 927 |
) + |
|
| 928 | 6x |
ggplot2::geom_vline( |
| 929 | 6x |
xintercept = v_lines$HDI_1_low, color = "gray80", linetype = 5, |
| 930 | 6x |
linewidth = 0.25 |
| 931 |
) + |
|
| 932 | 6x |
ggplot2::geom_vline( |
| 933 | 6x |
xintercept = v_lines$HDI_1_high, color = "gray80", linetype = 5, |
| 934 | 6x |
linewidth = 0.25 |
| 935 |
) + |
|
| 936 | 6x |
ggplot2::geom_vline( |
| 937 | 6x |
xintercept = v_lines$HDI_2_low, color = "gray80", linetype = 5, |
| 938 | 6x |
linewidth = 0.25 |
| 939 |
) + |
|
| 940 | 6x |
ggplot2::geom_vline( |
| 941 | 6x |
xintercept = v_lines$HDI_2_high, color = "gray80", linetype = 5, |
| 942 | 6x |
linewidth = 0.25 |
| 943 |
) + |
|
| 944 | 6x |
geom_density_2d_filled(breaks = ~ pretty(., n = 51)[-1], alpha = 0.9) + |
| 945 | 6x |
ggplot2::scale_fill_viridis_d(option = "plasma") + |
| 946 | 6x |
ggplot2::xlim(x_lim) + |
| 947 | 6x |
ggplot2::ylim(y_lim) + |
| 948 | 6x |
pcv_theme() + |
| 949 | 6x |
ggplot2::theme(legend.position = "none") |
| 950 | ||
| 951 |
#* `Make marginal distribution plot of each parameter (x, y)` |
|
| 952 | 6x |
margin_plots <- lapply(params, function(par) {
|
| 953 | 12x |
hdf <- res$summary |
| 954 | 12x |
hdf <- hdf[hdf$param == par, ] |
| 955 | 12x |
ggplot2::ggplot() + |
| 956 | 12x |
ggplot2::geom_area( |
| 957 | 12x |
data = margin_plot_df[margin_plot_df$param == par, ], |
| 958 | 12x |
alpha = 0.5, ggplot2::aes( |
| 959 | 12x |
x = .data$range, y = .data$prob, |
| 960 | 12x |
fill = .data$sample |
| 961 |
), |
|
| 962 | 12x |
position = "identity" |
| 963 |
) + |
|
| 964 | 12x |
ggplot2::geom_vline( |
| 965 | 12x |
data = data.frame(), |
| 966 | 12x |
ggplot2::aes( |
| 967 | 12x |
xintercept = hdf[, "HDI_1_low"] |
| 968 |
), |
|
| 969 | 12x |
color = "red", linetype = "dashed", |
| 970 | 12x |
linewidth = 0.5 |
| 971 |
) + |
|
| 972 | 12x |
ggplot2::geom_vline( |
| 973 | 12x |
data = data.frame(), |
| 974 | 12x |
ggplot2::aes( |
| 975 | 12x |
xintercept = hdf[, "HDE_1"] |
| 976 |
), |
|
| 977 | 12x |
color = "red", linewidth = 0.5 |
| 978 |
) + |
|
| 979 | 12x |
ggplot2::geom_vline( |
| 980 | 12x |
data = data.frame(), |
| 981 | 12x |
ggplot2::aes( |
| 982 | 12x |
xintercept = hdf[, "HDI_1_high"] |
| 983 |
), |
|
| 984 | 12x |
color = "red", linetype = "dashed", linewidth = 0.5 |
| 985 |
) + |
|
| 986 | 12x |
ggplot2::geom_vline( |
| 987 | 12x |
data = data.frame(), |
| 988 | 12x |
ggplot2::aes( |
| 989 | 12x |
xintercept = hdf[, "HDI_2_low"] |
| 990 |
), |
|
| 991 | 12x |
color = "blue", linetype = "dashed", |
| 992 | 12x |
linewidth = 0.5 |
| 993 |
) + |
|
| 994 | 12x |
ggplot2::geom_vline( |
| 995 | 12x |
data = data.frame(), |
| 996 | 12x |
ggplot2::aes( |
| 997 | 12x |
xintercept = hdf[, "HDE_2"] |
| 998 |
), |
|
| 999 | 12x |
color = "blue", linewidth = 0.5 |
| 1000 |
) + |
|
| 1001 | 12x |
ggplot2::geom_vline( |
| 1002 | 12x |
data = data.frame(), |
| 1003 | 12x |
ggplot2::aes( |
| 1004 | 12x |
xintercept = hdf[, "HDI_2_high"] |
| 1005 |
), |
|
| 1006 | 12x |
color = "blue", linetype = "dashed", linewidth = 0.5 |
| 1007 |
) + |
|
| 1008 | 12x |
ggplot2::scale_fill_manual(values = c("red", "blue")) +
|
| 1009 | 12x |
ggplot2::xlim(limits[[par]]) + |
| 1010 | 12x |
ggplot2::theme_void() + |
| 1011 | 12x |
ggplot2::theme( |
| 1012 | 12x |
legend.position = "inside", |
| 1013 | 12x |
legend.position.inside = c(0.1, 0.5), |
| 1014 | 12x |
legend.title = ggplot2::element_blank() |
| 1015 |
) |
|
| 1016 |
}) |
|
| 1017 | ||
| 1018 | 6x |
post.probs <- lapply(params, function(par) {
|
| 1019 | 12x |
hdf <- res$summary |
| 1020 | 12x |
hdf <- hdf[hdf$param == par, ] |
| 1021 | 12x |
if (hdf$post.prob < 1e-5) {
|
| 1022 | 1x |
post.prob.text <- "<1e-5" |
| 1023 |
} else {
|
|
| 1024 | 11x |
post.prob.text <- round(hdf$post.prob, 5) |
| 1025 |
} |
|
| 1026 | 12x |
return(post.prob.text) |
| 1027 |
}) |
|
| 1028 | 6x |
names(post.probs) <- params |
| 1029 | ||
| 1030 |
#* `Write title if there are 2 samples` |
|
| 1031 | 6x |
SUBTITLE <- paste(lapply(params, function(par) {
|
| 1032 | 12x |
paste0(par, ": P[s1", dirSymbol[[1]], "s2] = ", post.probs[[par]]) |
| 1033 | 6x |
}), collapse = "\n") |
| 1034 |
} |
|
| 1035 |
#* `Assemble Patchwork` |
|
| 1036 | 7x |
layout <- c( |
| 1037 | 7x |
patchwork::area(2, 1, 3, 2), |
| 1038 | 7x |
patchwork::area(1, 1, 1, 2), |
| 1039 | 7x |
patchwork::area(2, 3, 3, 3) |
| 1040 |
) |
|
| 1041 | 7x |
margin_plots[[2]] <- margin_plots[[2]] + |
| 1042 | 7x |
ggplot2::coord_flip() + |
| 1043 | 7x |
ggplot2::theme(legend.position = "none") |
| 1044 | ||
| 1045 | 7x |
p <- joint_p + margin_plots[[1]] + margin_plots[[2]] + |
| 1046 | 7x |
patchwork::plot_layout(design = layout) & |
| 1047 | 7x |
patchwork::plot_annotation(title = TITLE, subtitle = SUBTITLE) |
| 1048 | 7x |
return(p) |
| 1049 |
} |
| 1 |
#' Function to run a PCA, plot and optionally return the data with PCA coordinates and pca object |
|
| 2 |
#' |
|
| 3 |
#' @param df Dataframe to ordinate |
|
| 4 |
#' @param cols columns to reduce dimensions of. Can be specified with names or positions. |
|
| 5 |
#' If this is length of 1 then it is treated as regex pattern to match |
|
| 6 |
#' the column names that should be used. |
|
| 7 |
#' @param color column name(s) used to color points in the pca plot. |
|
| 8 |
#' @param facet Optional column or vector to facet plots on. |
|
| 9 |
#' @param returnData Logical, should data be returned? |
|
| 10 |
#' @param ncp Optional, number of principal components to return attached |
|
| 11 |
#' to dataframe if data is returned. Defaults to all. |
|
| 12 |
#' @keywords pca |
|
| 13 |
#' @details If data is returned then it will contain the coordinates from the |
|
| 14 |
#' PCA and will not contain the columns that were reduced. |
|
| 15 |
#' |
|
| 16 |
#' @import ggplot2 |
|
| 17 |
#' @import FactoMineR |
|
| 18 |
#' @importFrom stats as.formula |
|
| 19 |
#' @return A ggplot or list with a ggplot, a dataframe with the data and PCs, and the factominer |
|
| 20 |
#' PCA object as elements. |
|
| 21 |
#' @examples |
|
| 22 |
#' |
|
| 23 |
#' dists <- list( |
|
| 24 |
#' rlnorm = list(meanlog = log(40), sdlog = 0.5), |
|
| 25 |
#' rnorm = list(mean = 60, sd = 10) |
|
| 26 |
#' ) |
|
| 27 |
#' mv <- mvSim( |
|
| 28 |
#' dists = dists, n_samples = 100, counts = 1000, |
|
| 29 |
#' min_bin = 1, max_bin = 180, wide = TRUE |
|
| 30 |
#' ) |
|
| 31 |
#' mv$otherGroup <- sample(c("a", "b"), size = nrow(mv), replace = TRUE)
|
|
| 32 |
#' pcadf(mv, cols = "sim_", returnData = TRUE) |
|
| 33 |
#' pcadf(mv, cols = 2:181, color = c("group", "otherGroup"), returnData = FALSE)
|
|
| 34 |
#' |
|
| 35 |
#' @export |
|
| 36 | ||
| 37 |
pcadf <- function(df = NULL, cols = NULL, color = NULL, |
|
| 38 |
facet = NULL, returnData = TRUE, ncp = NULL) {
|
|
| 39 | 3x |
if (is.character(cols) && length(cols) == 1) {
|
| 40 | 2x |
cols <- which(grepl(cols, colnames(df))) |
| 41 |
} |
|
| 42 | 3x |
if (!is.null(color) && length(color) > 1) {
|
| 43 | 1x |
df[[paste(color, collapse = ".")]] <- interaction(df[, color]) |
| 44 | 1x |
color <- paste(color, collapse = ".") |
| 45 |
} |
|
| 46 | 3x |
if (is.null(ncp)) {
|
| 47 | 3x |
ncp <- min(dim(df[, cols])) - 1 |
| 48 |
} |
|
| 49 | 3x |
pca <- FactoMineR::PCA(df[, cols], ncp = ncp, graph = FALSE) |
| 50 | 3x |
pc1Var <- round(pca$eig[1, 2], 3) |
| 51 | 3x |
pc2Var <- round(pca$eig[2, 2], 3) |
| 52 | 3x |
coords <- as.data.frame(pca$ind) |
| 53 | 3x |
coords <- coords[, grepl("coord", colnames(coords))]
|
| 54 | 3x |
colnames(coords) <- gsub("coord.Dim.", "pc", colnames(coords))
|
| 55 | 3x |
if (!is.numeric(cols)) {
|
| 56 | ! |
cols <- which(colnames(df) %in% cols) |
| 57 |
} |
|
| 58 | 3x |
pca.df <- cbind(as.data.frame(df[, -cols]), coords) |
| 59 | 3x |
colnames(pca.df)[1:(length(colnames(df)) - length(cols))] <- colnames(df)[-cols] |
| 60 | 3x |
facetLayer <- NULL |
| 61 | 3x |
if (!is.null(facet)) {
|
| 62 | ! |
facetLayer <- ggplot2::facet_wrap(as.formula(paste0("~", paste(facet, collapse = "+"))))
|
| 63 |
} |
|
| 64 | ||
| 65 | 3x |
if (is.null(color)) {
|
| 66 | 1x |
pca.df$dummyVariableForColor <- 1 |
| 67 | 1x |
color <- "dummyVariableForColor" |
| 68 |
} |
|
| 69 | ||
| 70 | 3x |
plots <- .pcaGeneralPlot(pca.df, color, facetLayer, pc1Var, pc2Var) |
| 71 | ||
| 72 | 3x |
if (returnData) {
|
| 73 | 1x |
return(list("data" = pca.df, "pca" = pca, "plot" = plots))
|
| 74 |
} else {
|
|
| 75 | 2x |
return(plots) |
| 76 |
} |
|
| 77 |
} |
|
| 78 | ||
| 79 |
#' general pca plotting |
|
| 80 |
#' @keywords internal |
|
| 81 |
#' @noRd |
|
| 82 | ||
| 83 |
.pcaGeneralPlot <- function(pca.df, color, facetLayer, pc1Var, pc2Var) {
|
|
| 84 | 3x |
plots <- ggplot2::ggplot(pca.df, ggplot2::aes( |
| 85 | 3x |
x = .data$pc1, |
| 86 | 3x |
y = .data$pc2, color = .data[[color]] |
| 87 |
)) + |
|
| 88 | 3x |
ggplot2::geom_point() + |
| 89 | 3x |
ggplot2::labs(x = paste0("PC 1 (", pc1Var, "%)"), y = paste0("PC 2 (", pc2Var, "%)")) +
|
| 90 | 3x |
pcv_theme() |
| 91 | 3x |
if (color == "dummyVariableForColor") {
|
| 92 | 1x |
plots <- plots + ggplot2::theme(legend.position = "none") |
| 93 |
} |
|
| 94 | 3x |
plots <- plots + facetLayer |
| 95 | 3x |
return(plots) |
| 96 |
} |
| 1 |
#' Function to reshape phenotype data to survival data based on some pcvrForm |
|
| 2 |
#' |
|
| 3 |
#' @param df a dataframe to use |
|
| 4 |
#' @param form a formula describing the survival analysis model (see growthSS) |
|
| 5 |
#' @param model The distribution to use (model from .survModelParser ) |
|
| 6 |
#' |
|
| 7 |
#' @return A list including a dataframe and elements of the formula parsed. |
|
| 8 |
#' |
|
| 9 |
#' @examples |
|
| 10 |
#' |
|
| 11 |
#' df <- growthSim("logistic",
|
|
| 12 |
#' n = 20, t = 25, |
|
| 13 |
#' params = list("A" = c(200, 160), "B" = c(13, 11), "C" = c(3, 3.5))
|
|
| 14 |
#' ) |
|
| 15 |
#' form <- y > 100 ~ time | id / group |
|
| 16 |
#' model = "weibull" # or binomial |
|
| 17 |
#' .makeSurvData(df, form, model) |
|
| 18 |
#' |
|
| 19 |
#' @keywords internal |
|
| 20 |
#' @noRd |
|
| 21 | ||
| 22 |
.makeSurvData <- function(df, form, model = "weibull") {
|
|
| 23 |
#* `general pcvr formula parsing` |
|
| 24 | 8x |
parsed_form <- .parsePcvrForm(form, df) |
| 25 | 8x |
y <- parsed_form$y |
| 26 | 8x |
x <- parsed_form$x |
| 27 | 8x |
individual <- parsed_form$individual |
| 28 | 8x |
group <- parsed_form$group |
| 29 | 8x |
df <- parsed_form$data |
| 30 |
#* `further survival formula steps` |
|
| 31 | 8x |
y_var <- trimws(strsplit(y, ">|[|]")[[1]][1]) |
| 32 | 8x |
y_condition <- as.numeric(trimws(strsplit(y, ">|[|]")[[1]][2])) |
| 33 | 8x |
df[[y_var]] <- as.numeric(df[[y_var]] >= y_condition) |
| 34 | ||
| 35 | 8x |
df$remove_interaction <- interaction(df[[individual]], df[[group]]) |
| 36 | ||
| 37 | 8x |
if (model == "weibull") {
|
| 38 | 6x |
out_df <- do.call(rbind, lapply(unique(df$remove_interaction), function(i) {
|
| 39 | 240x |
sub <- df[df$remove_interaction == i, ] |
| 40 | 240x |
sub$censor <- ifelse(sub[[x]] == max(df[[x]]) & sub[[y_var]] == 0, 1, 0) |
| 41 | 240x |
sub[sub$censor == 1 | sub[[x]] == min(c(sub[sub[[y_var]] == 1, x], Inf)), ] |
| 42 |
})) |
|
| 43 | 6x |
colnames(out_df)[which(colnames(out_df) == y_var)] <- "event" |
| 44 | 2x |
} else if (model == "binomial") {
|
| 45 | 2x |
out_df <- do.call(rbind, lapply(unique(df[[x]]), function(time) {
|
| 46 | 50x |
sub <- df[df[[x]] == time, ] |
| 47 | 50x |
if (time != unique(df[[x]])[1]) {
|
| 48 | 48x |
prev <- df[df[[x]] == time - 1, ] |
| 49 |
} else {
|
|
| 50 | 2x |
prev <- sub |
| 51 |
} |
|
| 52 | 50x |
lt <- stats::setNames( |
| 53 | 50x |
aggregate(as.formula(paste0(y_var, " ~ ", group)), sub, sum), |
| 54 | 50x |
c("group", "n_events")
|
| 55 |
) |
|
| 56 | 50x |
lt$n_no_event <- aggregate(as.formula(paste0(y_var, " ~ ", group)), sub, function(x) {
|
| 57 | 100x |
sum(x == 0) |
| 58 | 50x |
})[, 2] |
| 59 | 50x |
lt$n_eligible <- aggregate(as.formula(paste0(y_var, " ~ ", group)), prev, length)[, 2] |
| 60 | 50x |
lt$pct_event <- lt$n_events / lt$n_eligible |
| 61 | 50x |
lt[[x]] <- time |
| 62 | 50x |
lt |
| 63 |
})) |
|
| 64 | 2x |
out_df[[x]] <- factor(out_df[[x]]) |
| 65 |
} |
|
| 66 | 8x |
out_df <- out_df[, !grepl("remove_interaction", colnames(out_df))]
|
| 67 | 8x |
ret <- list( |
| 68 | 8x |
"data" = out_df, "y_var" = y_var, "y_cutoff" = y_condition, |
| 69 | 8x |
"x" = x, "group" = group, "individual" = individual |
| 70 |
) |
|
| 71 | 8x |
return(ret) |
| 72 |
} |
| 1 |
#' Helper function to check groups in data. |
|
| 2 |
#' |
|
| 3 |
#' @param df Data frame to use. |
|
| 4 |
#' @param group Set of variables to use in grouping observations. |
|
| 5 |
#' These taken together should identify a unique plant (or unique plant at a unique angle) across time. |
|
| 6 |
#' @return If there are duplicates in the grouping then this will return a message with code to start |
|
| 7 |
#' checking the duplicates in your data. |
|
| 8 |
#' |
|
| 9 |
#' @examples |
|
| 10 |
#' |
|
| 11 |
#' df <- growthSim("linear",
|
|
| 12 |
#' n = 10, t = 10, |
|
| 13 |
#' params = list("A" = c(2, 1.5))
|
|
| 14 |
#' ) |
|
| 15 |
#' checkGroups(df, c("time", "id", "group"))
|
|
| 16 |
#' df$time[12] <- 3 |
|
| 17 |
#' checkGroups(df, c("time", "id", "group"))
|
|
| 18 |
#' |
|
| 19 |
#' @export |
|
| 20 | ||
| 21 |
checkGroups <- function(df, group) {
|
|
| 22 | 4x |
tab <- table(interaction(df[, c(group)])) |
| 23 | 4x |
if (any(tab > 1)) {
|
| 24 | 3x |
dataname <- deparse(substitute(df)) |
| 25 | 3x |
nms <- names(tab)[which(as.numeric(tab) > 1)] |
| 26 | 3x |
dupString <- paste0( |
| 27 | 3x |
dataname, |
| 28 | 3x |
"[duplicated(interaction(",
|
| 29 | 3x |
paste(paste0(dataname, "$", c(group)), collapse = ", "), |
| 30 |
")),]" |
|
| 31 |
) |
|
| 32 | 3x |
firstDup <- paste0( |
| 33 | 3x |
dataname, "[interaction(",
|
| 34 | 3x |
paste(paste0( |
| 35 | 3x |
dataname, "$", |
| 36 | 3x |
c(group) |
| 37 | 3x |
), collapse = ", "), ")=='", |
| 38 | 3x |
nms[1], "',]" |
| 39 |
) |
|
| 40 | 3x |
eval(parse(text = dupString)) |
| 41 | 3x |
w <- paste0( |
| 42 | 3x |
"There are ", length(nms), " observations that are not uniquely identified.", |
| 43 | 3x |
"\nThe max number of duplicates is ", |
| 44 | 3x |
max(tab, na.rm = TRUE), ".\nRun `", dupString, "` to see the duplicated rows,\n", |
| 45 | 3x |
" or ", firstDup, " to see the first duplicated instance." |
| 46 |
) |
|
| 47 | 3x |
message(w) |
| 48 |
} else {
|
|
| 49 | 1x |
message("Grouping is unique")
|
| 50 |
} |
|
| 51 |
} |
| 1 |
#' Function to visualize hypotheses tested on brms models similar to those made using growthSS outputs. |
|
| 2 |
#' |
|
| 3 |
#' |
|
| 4 |
#' |
|
| 5 |
#' @param fit A brmsfit object or a list of brmsfit objects |
|
| 6 |
#' @param params A list of parameters to use from the fit. |
|
| 7 |
#' Defaults to NULL in which case all growth model parameters are used. |
|
| 8 |
#' @param hyp A character string defining the hypothesis to be tested. |
|
| 9 |
#' Defaults to "num/denom > 1.05". The "num" and "denom" names should be kept, |
|
| 10 |
#' but the direction and magnitude can be changed. |
|
| 11 |
#' @param compareX Which groups in the model should be compared as numerator options? |
|
| 12 |
#' Defaults to NULL in which case a plot will not be made but the data will be returned. |
|
| 13 |
#' @param againstY Which group in the model should be used as the |
|
| 14 |
#' denominator (typically a control group to compare against)? |
|
| 15 |
#' Defaults to NULL in which case a plot will not be made but the data will be returned. |
|
| 16 |
#' @param group_sep A regex pattern to match the separation of grouping |
|
| 17 |
#' terms in the models group term (see the formula argument of \code{\link{growthSS}}).
|
|
| 18 |
#' The default uses "[.]" to break on a single period. |
|
| 19 |
#' @param groups_into A vector of column names to make after groups are split by group_sep. |
|
| 20 |
#' If this or groups_sep are NULL then no groups are assumed. |
|
| 21 |
#' @param x The variable to be plotted on the x axis (should be from groups_into). |
|
| 22 |
#' @param facet The variable to be used to facet the |
|
| 23 |
#' ggplot (should be another option from groups_into). |
|
| 24 |
#' If left NULL then the plot will only be faceted by params. |
|
| 25 |
#' Note that with the nature of againstY this faceting is often redundant |
|
| 26 |
#' but it does add labels which are helpful for keeping results organized.. |
|
| 27 |
#' @param cores Optional number of cores to run hypotheses in parallel. |
|
| 28 |
#' Defaults to 1 unless the "mc.cores" option is set. |
|
| 29 |
#' @param returnData Logical, should data be returned? |
|
| 30 |
#' This is treated as TRUE if a plot will not be generated but |
|
| 31 |
#' otherwise defaults to FALSE. |
|
| 32 |
#' |
|
| 33 |
#' @keywords brms ggplot2 |
|
| 34 |
#' |
|
| 35 |
#' @import ggplot2 |
|
| 36 |
#' @import viridis |
|
| 37 |
#' @import parallel |
|
| 38 |
#' @importFrom utils combn |
|
| 39 |
#' @importFrom stats as.formula setNames |
|
| 40 |
#' |
|
| 41 |
#' @examples |
|
| 42 |
#' \donttest{
|
|
| 43 |
#' set.seed(123) |
|
| 44 |
#' simdf <- growthSim( |
|
| 45 |
#' "logistic", |
|
| 46 |
#' n = 20, t = 25, |
|
| 47 |
#' params = list("A" = c(200, 160), "B" = c(13, 11), "C" = c(3, 3.5))
|
|
| 48 |
#' ) |
|
| 49 |
#' ss <- growthSS( |
|
| 50 |
#' model = "logistic", form = y ~ time | id / group, sigma = "spline", |
|
| 51 |
#' list("A" = 130, "B" = 10, "C" = 3),
|
|
| 52 |
#' df = simdf, type = "brms" |
|
| 53 |
#' ) |
|
| 54 |
#' |
|
| 55 |
#' fit <- fitGrowth(ss, backend = "cmdstanr", iter = 500, chains = 1, cores = 1) |
|
| 56 |
#' brmViolin(fit, |
|
| 57 |
#' hyp = "num/denom>1.05", |
|
| 58 |
#' compareX = "a", |
|
| 59 |
#' againstY = "b", returnData = TRUE |
|
| 60 |
#' ) |
|
| 61 |
#' } |
|
| 62 |
#' |
|
| 63 |
#' @return Returns a ggplot showing a brms model's posterior distributions |
|
| 64 |
#' as violins and filled by posterior probability of some hypothesis. |
|
| 65 |
#' |
|
| 66 |
#' @export |
|
| 67 | ||
| 68 |
brmViolin <- function(fit, params = NULL, hyp = "num/denom>1.05", compareX = "a", againstY = "b", |
|
| 69 |
group_sep = "[.]", groups_into = c("group2"), x = NULL, facet = NULL,
|
|
| 70 |
cores = getOption("mc.cores", 1), returnData = FALSE) {
|
|
| 71 |
#* `parse arguments` |
|
| 72 | ! |
model <- fit |
| 73 | ! |
brmViolinParseArgumentsRes <- .brmViolinParseArguments( |
| 74 | ! |
compareX, againstY, |
| 75 | ! |
model, params, group_sep, |
| 76 | ! |
groups_into |
| 77 |
) |
|
| 78 | ! |
compareFew <- brmViolinParseArgumentsRes[["compareFew"]] |
| 79 | ! |
compareX <- brmViolinParseArgumentsRes[["compareX"]] |
| 80 | ! |
model <- brmViolinParseArgumentsRes[["model"]] |
| 81 | ! |
params <- brmViolinParseArgumentsRes[["params"]] |
| 82 | ! |
useGroups <- brmViolinParseArgumentsRes[["useGroups"]] |
| 83 | ! |
if (is.null(x)) {
|
| 84 | ! |
x <- groups_into[1] |
| 85 |
} |
|
| 86 |
#* `get draws` |
|
| 87 | ! |
draws <- do.call(cbind, lapply(model, function(mod) { # extract draws for relevant parameters
|
| 88 | ! |
mdf <- as.data.frame(mod[["fit"]]) |
| 89 | ! |
colPattern <- paste0("^b_[", paste0(params, collapse = "|"), "]")
|
| 90 | ! |
mdf <- mdf[, grepl(colPattern, colnames(mdf))] |
| 91 | ! |
colnames(mdf) <- sub("^b_", "", colnames(mdf))
|
| 92 | ! |
mdf |
| 93 |
})) |
|
| 94 |
#* take "grouping" string from formula (pform) and using that with the parameter names I can find |
|
| 95 |
#* groups for any models? |
|
| 96 |
#* For models fit with only one group of many from data this would fail, I'll need another option |
|
| 97 |
#* for that. |
|
| 98 |
#* probably fine to build this out first then think about how that edge case works. |
|
| 99 | ! |
group_string <- trimws( |
| 100 | ! |
strsplit( |
| 101 | ! |
as.character(model[[1]]$formula$pforms[[params[1]]])[3], "[+]" |
| 102 | ! |
)[[1]] |
| 103 | ! |
)[2] |
| 104 | ||
| 105 | ! |
groupings <- unique(sub(paste0(".*", group_string), "", colnames(draws)))
|
| 106 | ! |
p1 <- combn(groupings, 2, simplify = FALSE) |
| 107 | ! |
p2 <- lapply(p1, rev) |
| 108 | ! |
p3 <- lapply(unique(groupings), function(g) c(g, g)) |
| 109 | ! |
comparisons <- c(p1, p2, p3) |
| 110 | ! |
if (compareFew) {
|
| 111 | ! |
comparisons <- comparisons[unlist(lapply(comparisons, function(comp) {
|
| 112 | ! |
comp[1] %in% compareX & comp[2] == againstY |
| 113 |
}))] |
|
| 114 |
} |
|
| 115 | ||
| 116 | ! |
colnames(draws) <- sub(group_string, "", colnames(draws)) |
| 117 | ||
| 118 | ! |
hyps_df <- do.call(rbind, lapply(params, function(param) {
|
| 119 | ! |
param_df <- do.call(rbind, parallel::mclapply(comparisons, function(comp) {
|
| 120 | ! |
num <- paste(param, comp[1], sep = "_") |
| 121 | ! |
denom <- paste(param, comp[2], sep = "_") |
| 122 | ! |
temp <- draws |
| 123 | ! |
temp$num <- temp[[num]] |
| 124 | ! |
temp$denom <- temp[[denom]] |
| 125 | ! |
x <- as.data.frame(brms::hypothesis(temp, paste0(hyp))$h) |
| 126 | ! |
x$param <- param |
| 127 | ! |
x$num <- sub("grouping", "", comp[1])
|
| 128 | ! |
x$denom <- sub("grouping", "", comp[2])
|
| 129 | ! |
x[, c("Post.Prob", "param", "num", "denom")]
|
| 130 | ! |
}, mc.cores = cores)) |
| 131 | ! |
param_df |
| 132 |
})) |
|
| 133 | ||
| 134 | ! |
hyps_df$discrete_post_prob <- factor( |
| 135 | ! |
ifelse(hyps_df$Post.Prob >= 0.99, "A", |
| 136 | ! |
ifelse(hyps_df$Post.Prob >= 0.95, "B", |
| 137 | ! |
ifelse(hyps_df$Post.Prob >= 0.85, "C", |
| 138 | ! |
ifelse(hyps_df$Post.Prob >= 0.75, "D", "E") |
| 139 |
) |
|
| 140 |
) |
|
| 141 |
), |
|
| 142 | ! |
levels = c("A", "B", "C", "D", "E"), ordered = TRUE
|
| 143 |
) |
|
| 144 | ||
| 145 | ! |
longdraw <- as.data.frame(data.table::melt(data.table::as.data.table(draws), |
| 146 | ! |
measure.vars = colnames(draws), value.name = "draw" |
| 147 |
)) |
|
| 148 | ! |
longdraw$param <- substr(longdraw$variable, 1, 1) |
| 149 | ! |
longdraw$group <- sub(paste0("[", paste0(params, collapse = "|"), "]_"), "", longdraw$variable)
|
| 150 | ! |
if (useGroups) {
|
| 151 | ! |
group_meta <- do.call(rbind, parallel::mclapply(longdraw$group, |
| 152 | ! |
function(g) {
|
| 153 | ! |
setNames( |
| 154 | ! |
data.frame(t(strsplit(g, group_sep)[[1]])), |
| 155 | ! |
groups_into |
| 156 |
) |
|
| 157 |
}, |
|
| 158 | ! |
mc.cores = cores |
| 159 |
)) |
|
| 160 | ! |
for (col in groups_into) {
|
| 161 | ! |
group_meta[[col]] <- factor(group_meta[[col]]) |
| 162 |
} |
|
| 163 | ! |
longdraw <- cbind(longdraw, group_meta) |
| 164 |
} |
|
| 165 | ! |
ldj <- merge(longdraw, hyps_df, by.x = c("group", "param"), by.y = c("num", "param"))
|
| 166 | ||
| 167 | ! |
if (!compareFew) {
|
| 168 | ! |
return(ldj) |
| 169 |
} |
|
| 170 | ||
| 171 | ! |
virPal <- unlist(lapply(c(1, 0.9, 0.75, 0.5, 0.25), function(i) viridis::plasma(1, 1, i))) |
| 172 | ||
| 173 |
# still need to separate "grouping" into x and y variables. |
|
| 174 | ||
| 175 | ! |
if (is.null(facet)) {
|
| 176 | ! |
facet_layer <- ggplot2::facet_wrap(~param, scales = "free_y") |
| 177 |
} else {
|
|
| 178 | ! |
facet_layer <- ggplot2::facet_grid(as.formula(paste0("param~", facet)), scales = "free_y")
|
| 179 |
} |
|
| 180 | ||
| 181 | ! |
violinPlot <- ggplot2::ggplot(ldj, ggplot2::aes( |
| 182 | ! |
x = .data[[x]], y = .data[["draw"]], |
| 183 | ! |
fill = .data[["discrete_post_prob"]] |
| 184 |
)) + |
|
| 185 | ! |
facet_layer + |
| 186 | ! |
ggplot2::geom_violin() + |
| 187 | ! |
lapply(unique(ldj$param), function(p) {
|
| 188 | ! |
x <- data.frame(param = p, mean = mean(ldj[ldj$param == p & ldj$group == ldj$denom, "draw"])) |
| 189 | ! |
ggplot2::geom_hline(data = x, ggplot2::aes(yintercept = mean), linetype = 5, linewidth = 0.5) |
| 190 |
}) + |
|
| 191 | ! |
ggplot2::scale_fill_manual( |
| 192 | ! |
values = virPal, breaks = c("A", "B", "C", "D", "E"),
|
| 193 | ! |
labels = c(">99%", ">95%", ">85%", ">75%", "<75%"), drop = TRUE
|
| 194 |
) + |
|
| 195 | ! |
ggplot2::labs(y = "Posterior Distribution", x = x, fill = "Discrete Posterior Probability") + |
| 196 | ! |
pcv_theme() + |
| 197 | ! |
ggplot2::theme( |
| 198 | ! |
legend.position = "bottom", axis.text.x.bottom = ggplot2::element_text(angle = 0, hjust = 0.5), |
| 199 | ! |
panel.border = ggplot2::element_rect(fill = NA) |
| 200 |
) |
|
| 201 | ! |
if (returnData) {
|
| 202 | ! |
return(list("plot" = violinPlot, "data" = ldj))
|
| 203 |
} else {
|
|
| 204 | ! |
return(violinPlot) |
| 205 |
} |
|
| 206 |
} |
|
| 207 | ||
| 208 | ||
| 209 |
.brmViolinParseArguments <- function(compareX, againstY, model, params, group_sep, groups_into) {
|
|
| 210 | ! |
compareFew <- FALSE |
| 211 | ! |
if (!is.null(compareX) && !is.null(againstY)) {
|
| 212 | ! |
compareFew <- TRUE |
| 213 | ! |
if (!againstY %in% compareX) {
|
| 214 | ! |
compareX <- c(compareX, againstY) |
| 215 |
} # make sure all violins will be filled if both are provided |
|
| 216 |
} |
|
| 217 | ||
| 218 | ! |
if (any(unlist(lapply(model, class)) != "brmsfit")) {
|
| 219 |
# if only one brmsfit is given then make a list of itself |
|
| 220 | ! |
model <- list(model) |
| 221 |
} |
|
| 222 | ||
| 223 | ! |
if (is.null(params)) { # if params aren't given then grab all
|
| 224 | ! |
nlPars <- names(model[[1]]$formula$pforms) |
| 225 | ! |
params <- nlPars[-which(grepl("sigma|nu", nlPars))]
|
| 226 |
} |
|
| 227 | ! |
useGroups <- FALSE |
| 228 | ! |
if (!is.null(group_sep) || !is.null(groups_into)) {
|
| 229 | ! |
useGroups <- TRUE |
| 230 |
} |
|
| 231 | ! |
return(list( |
| 232 | ! |
"compareFew" = compareFew, |
| 233 | ! |
"compareX" = compareX, |
| 234 | ! |
"model" = model, |
| 235 | ! |
"params" = params, |
| 236 | ! |
"useGroups" = useGroups |
| 237 |
)) |
|
| 238 |
} |
| 1 |
#' Helper function to parse distributional formulas and return components for fitting a brms model |
|
| 2 |
#' |
|
| 3 |
#' @param model a model passed from growthSS |
|
| 4 |
#' @param x the x variable |
|
| 5 |
#' @param y the response variable |
|
| 6 |
#' @param group the grouping variable |
|
| 7 |
#' @param sigma Logical, is this a distributional parameter? Defaults to TRUE here. |
|
| 8 |
#' @param nTimes Number of unique x values. Used for splines, if nTimes is too low then the spline knots |
|
| 9 |
#' must be adjusted. Defaults to 25. |
|
| 10 |
#' @param useGroup Logical, should groups be used? |
|
| 11 |
#' @param priors Priors in growthSS syntax, passed to .brmsChangePointHelper for thresholded models. |
|
| 12 |
#' @param int logical, should an intercept be included in the model? |
|
| 13 |
#' |
|
| 14 |
#' @return A list of elements to pass brmSS for fitting distributional models |
|
| 15 |
#' |
|
| 16 |
#' |
|
| 17 |
#' @keywords internal |
|
| 18 |
#' @noRd |
|
| 19 | ||
| 20 |
.brmDparHelper <- function(dpar, model, x, group, nTimes, useGroup, priors, int = FALSE) {
|
|
| 21 | 80x |
splineDparHelperForm <- NULL |
| 22 | 80x |
if (grepl("\\+", model)) {
|
| 23 | 2x |
chngptHelperList <- .brmsChangePointHelper(model, x, |
| 24 | 2x |
y = dpar, group, dpar = TRUE, |
| 25 | 2x |
nTimes, useGroup, priors, int = int |
| 26 |
) |
|
| 27 | 2x |
dparForm <- chngptHelperList$growthForm |
| 28 | 2x |
dpar_pars <- chngptHelperList$pars |
| 29 | 2x |
splineDparHelperForm <- chngptHelperList$splineHelperForm |
| 30 |
} else {
|
|
| 31 | 78x |
if (model == "homo") {
|
| 32 | ! |
model <- "int" # recode alternate names |
| 33 | 78x |
} else if (model == "spline") {
|
| 34 | 4x |
model <- "gam" |
| 35 |
} |
|
| 36 | 78x |
stringBrmsDparFormFun <- paste0(".brms_form_", gsub(" ", "", model))
|
| 37 | 78x |
brmsDparFormFun <- match.fun(stringBrmsDparFormFun) |
| 38 | 78x |
formResDpar <- brmsDparFormFun(x, dpar, group, |
| 39 | 78x |
dpar = TRUE, nTimes = nTimes, |
| 40 | 78x |
useGroup = useGroup, prior = priors, int = int |
| 41 |
) |
|
| 42 | 78x |
dparForm <- formResDpar$form |
| 43 | 78x |
dpar_pars <- formResDpar$pars |
| 44 |
} |
|
| 45 | 80x |
return(list( |
| 46 | 80x |
"dparForm" = dparForm, "dpar_pars" = dpar_pars, |
| 47 | 80x |
"dparSplineHelperForm" = splineDparHelperForm |
| 48 |
)) |
|
| 49 |
} |
| 1 |
#' @description |
|
| 2 |
#' Internal function for calculating the pareto distribution of the upper boundary of a uniform |
|
| 3 |
#' distribution represented by single value traits. |
|
| 4 |
#' @param s1 A vector of numerics drawn from a uniform distribution. |
|
| 5 |
#' @examples |
|
| 6 |
#' set.seed(123) |
|
| 7 |
#' s1 = stats::runif(10, -1, 10) |
|
| 8 |
#' out <- .conj_bivariate_uniform_sv( |
|
| 9 |
#' s1 = s1, cred.int.level = 0.95, |
|
| 10 |
#' plot = TRUE |
|
| 11 |
#' ) |
|
| 12 |
#' lapply(out, head) |
|
| 13 |
#' |
|
| 14 |
#' @keywords internal |
|
| 15 |
#' @noRd |
|
| 16 | ||
| 17 |
.conj_bivariate_uniform_sv <- function(s1 = NULL, priors = NULL, |
|
| 18 |
plot = FALSE, support = NULL, cred.int.level = NULL, |
|
| 19 |
calculatingSupport = FALSE) {
|
|
| 20 | 10x |
out <- list() |
| 21 |
#* `make default prior if none provided` |
|
| 22 |
#* conjugate prior needs r1, r2, and alpha |
|
| 23 |
#* which are locations and a shared shape (scale) paramter |
|
| 24 | 10x |
if (is.null(priors)) {
|
| 25 | 6x |
priors <- list(location_l = 1, location_u = 2, scale = 1) |
| 26 |
} |
|
| 27 |
#* `Update bivariate pareto prior with sufficient statistics` |
|
| 28 | 10x |
location_u_prime <- max(c(s1, priors$location_u[1])) |
| 29 | 10x |
location_l_prime <- min(c(s1, priors$location_l[1])) |
| 30 | 10x |
scale_prime <- priors$scale[1] + length(s1) |
| 31 |
#* `Define bivariate support if it is missing` |
|
| 32 |
#* `this will require some thought since there are two directions.` |
|
| 33 |
#* first problem is that qpareto requires the parameters to be > 0, |
|
| 34 |
#* but the lower boundary can be negative, or could be something like 1 |
|
| 35 |
#* with a tail that becomes negative. I guess changing the center is the |
|
| 36 |
#* way to account for those potential problems. |
|
| 37 | 10x |
if (is.null(support)) {
|
| 38 | 5x |
(quantiles_u <- qpareto(c(0.0001, 0.9999), scale_prime, abs(location_u_prime))) |
| 39 | 5x |
(quantiles_l <- qpareto(c(0.0001, 0.9999), scale_prime, abs(location_l_prime))) |
| 40 | 5x |
support_l <- seq(quantiles_l[1], quantiles_l[2], length.out = 10000) |
| 41 | 5x |
support_u <- seq(quantiles_u[1], quantiles_u[2], length.out = 10000) |
| 42 | ||
| 43 | 5x |
if (location_l_prime < 0) {
|
| 44 | 3x |
quantiles_l <- -1 * rev(quantiles_l) |
| 45 | 3x |
support_l <- seq(quantiles_l[1], quantiles_l[2], length.out = 10000) |
| 46 |
} |
|
| 47 | 5x |
if (location_u_prime < 0) {
|
| 48 | 2x |
quantiles_u <- -1 * rev(quantiles_u) |
| 49 | 2x |
support_u <- seq(quantiles_u[1], quantiles_u[2], length.out = 10000) |
| 50 |
} |
|
| 51 | ||
| 52 | 5x |
if (calculatingSupport) {
|
| 53 | 5x |
return(list("A" = quantiles_l, "B" = quantiles_u))
|
| 54 |
} |
|
| 55 |
} else {
|
|
| 56 | 5x |
support_l <- support$A |
| 57 | 5x |
support_u <- support$B |
| 58 |
} |
|
| 59 |
#* `Make Posterior Draws` |
|
| 60 | 5x |
out$posteriorDraws <- .conj_cond_inv_rpareto( |
| 61 | 5x |
10000, location_l_prime, location_u_prime, |
| 62 | 5x |
scale_prime |
| 63 |
) |
|
| 64 |
#* `posterior` |
|
| 65 |
#* this also needs to handle the possibility of negative locations |
|
| 66 | 5x |
if (location_l_prime < 0) {
|
| 67 | 3x |
dens_l <- extraDistr::dpareto(-1 * support_l, scale_prime, abs(location_l_prime)) |
| 68 |
} else {
|
|
| 69 | 2x |
dens_l <- extraDistr::dpareto(support_l, scale_prime, location_l_prime) |
| 70 |
} |
|
| 71 | ||
| 72 | 5x |
if (location_u_prime < 0) {
|
| 73 | 2x |
dens_u <- extraDistr::dpareto(-1 * support_u, scale_prime, abs(location_u_prime)) |
| 74 |
} else {
|
|
| 75 | 3x |
dens_u <- extraDistr::dpareto(support_u, scale_prime, location_u_prime) |
| 76 |
} |
|
| 77 | ||
| 78 | 5x |
pdf_l <- dens_l / sum(dens_l) |
| 79 | 5x |
pdf_u <- dens_u / sum(dens_u) |
| 80 | 5x |
out$pdf <- list("A" = pdf_l, "B" = pdf_u)
|
| 81 | ||
| 82 | 5x |
hde_l <- location_l_prime |
| 83 | 5x |
hde_u <- location_u_prime |
| 84 | ||
| 85 | 5x |
if (location_l_prime < 0) {
|
| 86 | 3x |
hdi_l <- -1 * rev(extraDistr::qpareto( |
| 87 | 3x |
c((1 - cred.int.level) / 2, (1 - ((1 - cred.int.level) / 2))), |
| 88 | 3x |
scale_prime, abs(location_l_prime) |
| 89 |
)) |
|
| 90 |
} else {
|
|
| 91 | 2x |
hdi_l <- extraDistr::qpareto( |
| 92 | 2x |
c((1 - cred.int.level) / 2, (1 - ((1 - cred.int.level) / 2))), |
| 93 | 2x |
scale_prime, location_l_prime |
| 94 |
) |
|
| 95 |
} |
|
| 96 | 5x |
if (location_u_prime < 0) {
|
| 97 | 2x |
hdi_u <- -1 * rev(extraDistr::qpareto( |
| 98 | 2x |
c((1 - cred.int.level) / 2, (1 - ((1 - cred.int.level) / 2))), |
| 99 | 2x |
scale_prime, abs(location_u_prime) |
| 100 |
)) |
|
| 101 |
} else {
|
|
| 102 | 3x |
hdi_u <- extraDistr::qpareto( |
| 103 | 3x |
c((1 - cred.int.level) / 2, (1 - ((1 - cred.int.level) / 2))), |
| 104 | 3x |
scale_prime, location_u_prime |
| 105 |
) |
|
| 106 |
} |
|
| 107 | ||
| 108 |
#* `Store summary` |
|
| 109 | 5x |
out$summary <- data.frame( |
| 110 | 5x |
HDE_1 = c(hde_l, hde_u), |
| 111 | 5x |
HDI_1_low = c(hdi_l[1], hdi_u[1]), |
| 112 | 5x |
HDI_1_high = c(hdi_l[2], hdi_u[2]), |
| 113 | 5x |
param = c("A", "B")
|
| 114 |
) |
|
| 115 | 5x |
out$posterior <- list( |
| 116 | 5x |
"scale" = scale_prime, "location_l" = location_l_prime, |
| 117 | 5x |
"location_u" = location_u_prime |
| 118 |
) |
|
| 119 |
#* `save s1 data for plotting` |
|
| 120 | 5x |
if (plot) {
|
| 121 | 5x |
out$plot_df <- data.frame( |
| 122 | 5x |
"range" = c(support_l, support_u), |
| 123 | 5x |
"prob" = c(pdf_l, pdf_u), |
| 124 | 5x |
"param" = rep(c("A", "B"), each = length(support_u)),
|
| 125 | 5x |
"sample" = rep("Sample 1", 2 * length(support_u))
|
| 126 |
) |
|
| 127 |
} |
|
| 128 | 5x |
return(out) |
| 129 |
} |
|
| 130 | ||
| 131 |
#' @description |
|
| 132 |
#' Internal function for calculating conditional inverse sampling from bivariate pareto |
|
| 133 |
#' @examples |
|
| 134 |
#' .conj_cond_inv_rpareto(10, 1, 2, 10) |
|
| 135 |
#' |
|
| 136 |
#' @keywords internal |
|
| 137 |
#' @noRd |
|
| 138 | ||
| 139 |
.conj_cond_inv_rpareto <- function(n, r1, r2, scale) {
|
|
| 140 | 9x |
u <- stats::runif(n, min = 0, max = 1) |
| 141 |
# pareto quantile function |
|
| 142 | 9x |
x2 <- r2 / (u^(1 / scale)) |
| 143 | 9x |
u <- stats::runif(n, min = 0, max = 1) |
| 144 |
# this is a displaced origin pareto |
|
| 145 |
# also using quantile function of the marginal x1 | x2 |
|
| 146 | 9x |
x1 <- r1 + (r1 / r2) * x2 * (1 / (u^(1 / (scale + 1))) - 1) |
| 147 | 9x |
return(cbind.data.frame("A" = x1, "B" = x2))
|
| 148 |
} |
|
| 149 | ||
| 150 |
#' @description |
|
| 151 |
#' Internal function for calculating the pareto distribution of the upper boundary of a uniform |
|
| 152 |
#' distribution represented by multi value traits. |
|
| 153 |
#' @param s1 A vector of numerics drawn from a uniform distribution. |
|
| 154 |
#' @examples |
|
| 155 |
#' s1 <- mvSim( |
|
| 156 |
#' dists = list(runif = list(min = 15, max = 150)), |
|
| 157 |
#' n_samples = 10, |
|
| 158 |
#' counts = 1000, |
|
| 159 |
#' min_bin = 1, |
|
| 160 |
#' max_bin = 180, |
|
| 161 |
#' wide = TRUE |
|
| 162 |
#' ) |
|
| 163 |
#' out <- .conj_bivariate_uniform_mv( |
|
| 164 |
#' s1 = s1[, -1], cred.int.level = 0.95, |
|
| 165 |
#' priors = list(location_l = 50, location_u = 100, scale = 1), |
|
| 166 |
#' plot = FALSE |
|
| 167 |
#' ) |
|
| 168 |
#' lapply(out, head) |
|
| 169 |
#' @keywords internal |
|
| 170 |
#' @noRd |
|
| 171 | ||
| 172 |
.conj_bivariate_uniform_mv <- function(s1 = NULL, priors = NULL, |
|
| 173 |
plot = FALSE, support = NULL, cred.int.level = NULL, |
|
| 174 |
calculatingSupport = FALSE) {
|
|
| 175 | 8x |
out <- list() |
| 176 |
#* `make default prior if none provided` |
|
| 177 |
#* conjugate prior needs r1, r2, and alpha |
|
| 178 |
#* which are locations and a shared shape (scale) paramter |
|
| 179 | 8x |
if (is.null(priors)) {
|
| 180 | 4x |
priors <- list(location_l = 1, location_u = 2, scale = 1) |
| 181 |
} |
|
| 182 |
#* `Calculate Sufficient Statistics` |
|
| 183 |
#* `N observations` |
|
| 184 | 8x |
n_obs <- nrow(s1) |
| 185 |
#* `Max non-zero bin` |
|
| 186 | 8x |
max_obs <- max(unlist(lapply(seq_len(n_obs), function(i) {
|
| 187 | 80x |
col <- utils::tail(colnames(s1)[which(s1[i, ] > 0)], 1) |
| 188 | 80x |
as.numeric(gsub("[a-zA-Z]_*", "", col))
|
| 189 | 8x |
})), na.rm = TRUE) |
| 190 |
#* `Min non-zero bin` |
|
| 191 | 8x |
min_obs <- min(unlist(lapply(seq_len(n_obs), function(i) {
|
| 192 | 80x |
col <- colnames(s1)[which(s1[i, ] > 0)][1] |
| 193 | 80x |
as.numeric(gsub("[a-zA-Z]_*", "", col))
|
| 194 | 8x |
})), na.rm = TRUE) |
| 195 |
#* `Update bivariate pareto prior with sufficient statistics` |
|
| 196 | 8x |
location_u_prime <- max(c(max_obs, priors$location_u[1])) |
| 197 | 8x |
location_l_prime <- min(c(min_obs, priors$location_l[1])) |
| 198 | 8x |
scale_prime <- priors$scale[1] + n_obs |
| 199 |
#* `Define bivariate support if it is missing` |
|
| 200 |
#* `this will require some thought since there are two directions.` |
|
| 201 |
#* first problem is that qpareto requires the parameters to be > 0, |
|
| 202 |
#* but the lower boundary can be negative, or could be something like 1 |
|
| 203 |
#* with a tail that becomes negative. I guess changing the center is the |
|
| 204 |
#* way to account for those potential problems. |
|
| 205 | 8x |
if (is.null(support)) {
|
| 206 | 4x |
(quantiles_u <- qpareto(c(0.0001, 0.9999), scale_prime, abs(location_u_prime))) |
| 207 | 4x |
(quantiles_l <- qpareto(c(0.0001, 0.9999), scale_prime, abs(location_l_prime))) |
| 208 | 4x |
support_l <- seq(quantiles_l[1], quantiles_l[2], length.out = 10000) |
| 209 | 4x |
support_u <- seq(quantiles_u[1], quantiles_u[2], length.out = 10000) |
| 210 | ||
| 211 | 4x |
if (location_l_prime < 0) {
|
| 212 | 2x |
quantiles_l <- -1 * rev(quantiles_l) |
| 213 | 2x |
support_l <- seq(quantiles_l[1], quantiles_l[2], length.out = 10000) |
| 214 |
} |
|
| 215 | 4x |
if (location_u_prime < 0) {
|
| 216 | 2x |
quantiles_u <- -1 * rev(quantiles_u) |
| 217 | 2x |
support_u <- seq(quantiles_u[1], quantiles_u[2], length.out = 10000) |
| 218 |
} |
|
| 219 | ||
| 220 | 4x |
if (calculatingSupport) {
|
| 221 | 4x |
return(list("A" = quantiles_l, "B" = quantiles_u))
|
| 222 |
} |
|
| 223 |
} else {
|
|
| 224 | 4x |
support_l <- support$A |
| 225 | 4x |
support_u <- support$B |
| 226 |
} |
|
| 227 |
#* `Make Posterior Draws` |
|
| 228 | 4x |
out$posteriorDraws <- .conj_cond_inv_rpareto( |
| 229 | 4x |
10000, location_l_prime, location_u_prime, |
| 230 | 4x |
scale_prime |
| 231 |
) |
|
| 232 |
#* `posterior` |
|
| 233 |
#* this also needs to handle the possibility of negative locations |
|
| 234 | 4x |
if (location_l_prime < 0) {
|
| 235 | 2x |
dens_l <- extraDistr::dpareto(-1 * support_l, scale_prime, abs(location_l_prime)) |
| 236 |
} else {
|
|
| 237 | 2x |
dens_l <- extraDistr::dpareto(support_l, scale_prime, location_l_prime) |
| 238 |
} |
|
| 239 | ||
| 240 | 4x |
if (location_u_prime < 0) {
|
| 241 | 2x |
dens_u <- extraDistr::dpareto(-1 * support_u, scale_prime, abs(location_u_prime)) |
| 242 |
} else {
|
|
| 243 | 2x |
dens_u <- extraDistr::dpareto(support_u, scale_prime, location_u_prime) |
| 244 |
} |
|
| 245 | ||
| 246 | 4x |
pdf_l <- dens_l / sum(dens_l) |
| 247 | 4x |
pdf_u <- dens_u / sum(dens_u) |
| 248 | 4x |
out$pdf <- list("A" = pdf_l, "B" = pdf_u)
|
| 249 | ||
| 250 | 4x |
hde_l <- location_l_prime |
| 251 | 4x |
hde_u <- location_u_prime |
| 252 | ||
| 253 | 4x |
if (location_l_prime < 0) {
|
| 254 | 2x |
hdi_l <- -1 * rev(extraDistr::qpareto( |
| 255 | 2x |
c((1 - cred.int.level) / 2, (1 - ((1 - cred.int.level) / 2))), |
| 256 | 2x |
scale_prime, abs(location_l_prime) |
| 257 |
)) |
|
| 258 |
} else {
|
|
| 259 | 2x |
hdi_l <- extraDistr::qpareto( |
| 260 | 2x |
c((1 - cred.int.level) / 2, (1 - ((1 - cred.int.level) / 2))), |
| 261 | 2x |
scale_prime, location_l_prime |
| 262 |
) |
|
| 263 |
} |
|
| 264 | 4x |
if (location_u_prime < 0) {
|
| 265 | 2x |
hdi_u <- -1 * rev(extraDistr::qpareto( |
| 266 | 2x |
c((1 - cred.int.level) / 2, (1 - ((1 - cred.int.level) / 2))), |
| 267 | 2x |
scale_prime, abs(location_u_prime) |
| 268 |
)) |
|
| 269 |
} else {
|
|
| 270 | 2x |
hdi_u <- extraDistr::qpareto( |
| 271 | 2x |
c((1 - cred.int.level) / 2, (1 - ((1 - cred.int.level) / 2))), |
| 272 | 2x |
scale_prime, location_u_prime |
| 273 |
) |
|
| 274 |
} |
|
| 275 | ||
| 276 |
#* `Store summary` |
|
| 277 | 4x |
out$summary <- data.frame( |
| 278 | 4x |
HDE_1 = c(hde_l, hde_u), |
| 279 | 4x |
HDI_1_low = c(hdi_l[1], hdi_u[1]), |
| 280 | 4x |
HDI_1_high = c(hdi_l[2], hdi_u[2]), |
| 281 | 4x |
param = c("A", "B")
|
| 282 |
) |
|
| 283 | 4x |
out$posterior <- list( |
| 284 | 4x |
"scale" = scale_prime, "location_l" = location_l_prime, |
| 285 | 4x |
"location_u" = location_u_prime |
| 286 |
) |
|
| 287 |
#* `save s1 data for plotting` |
|
| 288 | 4x |
if (plot) {
|
| 289 | 4x |
out$plot_df <- data.frame( |
| 290 | 4x |
"range" = c(support_l, support_u), |
| 291 | 4x |
"prob" = c(pdf_l, pdf_u), |
| 292 | 4x |
"param" = rep(c("A", "B"), each = length(support_u)),
|
| 293 | 4x |
"sample" = rep("Sample 1", 2 * length(support_u))
|
| 294 |
) |
|
| 295 |
} |
|
| 296 | 4x |
return(out) |
| 297 |
} |
| 1 |
#' @description |
|
| 2 |
#' Internal function for calculating the pareto distribution of the upper boundary of a uniform |
|
| 3 |
#' distribution represented by multi value traits. |
|
| 4 |
#' @param s1 A data.frame or matrix of multi value traits. |
|
| 5 |
#' @examples |
|
| 6 |
#' s1 <- mvSim( |
|
| 7 |
#' dists = list(runif = list(min = 0, max = 150)), |
|
| 8 |
#' n_samples = 10, |
|
| 9 |
#' counts = 1000, |
|
| 10 |
#' min_bin = 1, |
|
| 11 |
#' max_bin = 180, |
|
| 12 |
#' wide = TRUE |
|
| 13 |
#' ) |
|
| 14 |
#' out <- .conj_uniform_mv(s1, cred.int.level = 0.95) |
|
| 15 |
#' lapply(out, head) |
|
| 16 |
#' @importFrom utils tail |
|
| 17 |
#' |
|
| 18 |
#' @keywords internal |
|
| 19 |
#' @noRd |
|
| 20 |
.conj_uniform_mv <- function(s1 = NULL, priors = NULL, |
|
| 21 |
plot = FALSE, support = NULL, cred.int.level = NULL, |
|
| 22 |
calculatingSupport = FALSE) {
|
|
| 23 | 4x |
out <- list() |
| 24 |
#* `make default prior if none provided` |
|
| 25 | 4x |
if (is.null(priors)) {
|
| 26 | 4x |
priors <- list(scale = 0.5, location = 0.5) |
| 27 |
} |
|
| 28 |
#* `Reorder columns if they are not in the numeric order` |
|
| 29 | 4x |
histColsBin <- as.numeric(sub("[a-zA-Z_.]+", "", colnames(s1)))
|
| 30 | 4x |
bins_order <- sort(histColsBin, index.return = TRUE)$ix |
| 31 | 4x |
s1 <- s1[, bins_order] |
| 32 |
#* `Calculate Sufficient Statistics` |
|
| 33 |
#* `N observations` |
|
| 34 | 4x |
n_obs <- nrow(s1) |
| 35 |
#* `Max non-zero bin` |
|
| 36 | 4x |
max_obs <- max(unlist(lapply(seq_len(n_obs), function(i) {
|
| 37 | 120x |
col <- utils::tail(colnames(s1)[which(s1[i, ] > 0)], 1) |
| 38 | 120x |
as.numeric(gsub("[a-zA-Z]_*", "", col))
|
| 39 | 4x |
})), na.rm = TRUE) |
| 40 |
#* `Update pareto prior with sufficient statistics` |
|
| 41 | 4x |
scale_prime <- priors$scale + n_obs |
| 42 | 4x |
location_prime <- max(c(max_obs, priors$location), na.rm = TRUE) |
| 43 |
#* `Define support if it is missing` |
|
| 44 | 4x |
if (is.null(support) && calculatingSupport) {
|
| 45 | 2x |
quantiles <- qpareto(c(0.0001, 0.9999), scale_prime, location_prime) |
| 46 | 2x |
return(quantiles) |
| 47 |
} |
|
| 48 |
#* `Make Posterior Draws` |
|
| 49 | 2x |
out$posteriorDraws <- extraDistr::rpareto(10000, scale_prime, location_prime) |
| 50 |
#* `posterior` |
|
| 51 | 2x |
dens1 <- extraDistr::dpareto(support, scale_prime, location_prime) |
| 52 | 2x |
pdf1 <- dens1 / sum(dens1) |
| 53 | 2x |
out$pdf <- pdf1 |
| 54 |
# hde of location is calculated off of the posterior draws |
|
| 55 | 2x |
hde1 <- mean(as.numeric(hdi(out$posteriorDraws, ci = 0.01)[c("CI_low", "CI_high")]))
|
| 56 |
# mode is location |
|
| 57 |
# mean is defined if scale > 1: scale_prime * location_prime / scale_prime - 1 |
|
| 58 |
# median is location x root scale of 2 |
|
| 59 | 2x |
hdi1 <- extraDistr::qpareto( |
| 60 | 2x |
c((1 - cred.int.level) / 2, (1 - ((1 - cred.int.level) / 2))), |
| 61 | 2x |
scale_prime, location_prime |
| 62 |
) |
|
| 63 |
#* `Store summary` |
|
| 64 | 2x |
out$summary <- data.frame(HDE_1 = hde1, HDI_1_low = hdi1[1], HDI_1_high = hdi1[2]) |
| 65 | 2x |
out$posterior <- list("scale" = scale_prime, "location" = location_prime)
|
| 66 |
#* `save s1 data for plotting` |
|
| 67 | 2x |
if (plot) {
|
| 68 | 2x |
out$plot_df <- data.frame( |
| 69 | 2x |
"range" = support, |
| 70 | 2x |
"prob" = pdf1, |
| 71 | 2x |
"sample" = rep("Sample 1", length(support))
|
| 72 |
) |
|
| 73 |
} |
|
| 74 | 2x |
return(out) |
| 75 |
} |
|
| 76 | ||
| 77 |
#' @description |
|
| 78 |
#' Internal function for calculating the pareto distribution of the upper boundary of a uniform |
|
| 79 |
#' distribution represented by single value traits. |
|
| 80 |
#' @param s1 A vector of numerics drawn from a uniform distribution. |
|
| 81 |
#' @examples |
|
| 82 |
#' out <- .conj_uniform_sv( |
|
| 83 |
#' s1 = runif(10, 0, 100), cred.int.level = 0.95, |
|
| 84 |
#' plot = FALSE |
|
| 85 |
#' ) |
|
| 86 |
#' lapply(out, head) |
|
| 87 |
#' @keywords internal |
|
| 88 |
#' @noRd |
|
| 89 |
.conj_uniform_sv <- function(s1 = NULL, priors = NULL, |
|
| 90 |
plot = FALSE, support = NULL, cred.int.level = NULL, |
|
| 91 |
calculatingSupport = FALSE) {
|
|
| 92 | 4x |
out <- list() |
| 93 |
#* `make default prior if none provided` |
|
| 94 | 4x |
if (is.null(priors)) {
|
| 95 | 4x |
priors <- list(scale = 0.5, location = 0.5) |
| 96 |
} |
|
| 97 |
#* `Update pareto prior with sufficient statistics` |
|
| 98 | 4x |
scale_prime <- priors$scale + length(s1) |
| 99 | 4x |
location_prime <- max(c(s1, priors$location), na.rm = TRUE) |
| 100 |
#* `Define support if it is missing` |
|
| 101 | 4x |
if (is.null(support) && calculatingSupport) {
|
| 102 | 2x |
quantiles <- qpareto(c(0.0001, 0.9999), scale_prime, location_prime) |
| 103 | 2x |
return(quantiles) |
| 104 |
} |
|
| 105 |
#* `Make Posterior Draws` |
|
| 106 | 2x |
out$posteriorDraws <- extraDistr::rpareto(10000, scale_prime, location_prime) |
| 107 |
#* `posterior` |
|
| 108 | 2x |
dens1 <- extraDistr::dpareto(support, scale_prime, location_prime) |
| 109 | 2x |
pdf1 <- dens1 / sum(dens1) |
| 110 | 2x |
out$pdf <- pdf1 |
| 111 |
# hde of location is calculated off of the posterior draws |
|
| 112 | 2x |
hde1 <- mean(as.numeric(hdi(out$posteriorDraws, ci = 0.01)[c("CI_low", "CI_high")]))
|
| 113 |
# mode is location |
|
| 114 |
# mean is defined if scale > 1: scale_prime * location_prime / scale_prime - 1 |
|
| 115 |
# median is location x root scale of 2 |
|
| 116 | 2x |
hdi1 <- extraDistr::qpareto( |
| 117 | 2x |
c((1 - cred.int.level) / 2, (1 - ((1 - cred.int.level) / 2))), |
| 118 | 2x |
scale_prime, location_prime |
| 119 |
) |
|
| 120 |
#* `Store summary` |
|
| 121 | 2x |
out$summary <- data.frame(HDE_1 = hde1, HDI_1_low = hdi1[1], HDI_1_high = hdi1[2]) |
| 122 | 2x |
out$posterior <- list("scale" = scale_prime, "location" = location_prime)
|
| 123 |
#* `save s1 data for plotting` |
|
| 124 | 2x |
if (plot) {
|
| 125 | 2x |
out$plot_df <- data.frame( |
| 126 | 2x |
"range" = support, |
| 127 | 2x |
"prob" = pdf1, |
| 128 | 2x |
"sample" = rep("Sample 1", length(support))
|
| 129 |
) |
|
| 130 |
} |
|
| 131 | 2x |
return(out) |
| 132 |
} |
| 1 |
#' Variance partitioning using Full Random Effects Models |
|
| 2 |
#' |
|
| 3 |
#' Variance partitioning for phenotypes (over time) using fully random effects models |
|
| 4 |
#' |
|
| 5 |
#' @param df Dataframe containing phenotypes and design variables, optionally over time. |
|
| 6 |
#' @param des Design variables to partition variance for as a character vector. |
|
| 7 |
#' @param phenotypes Phenotype column names (data is assumed to be in wide format) |
|
| 8 |
#' as a character vector. |
|
| 9 |
#' @param timeCol A column of the data that denotes time for longitudinal experiments. |
|
| 10 |
#' If left NULL (the default) then all data is assumed to be from one timepoint. |
|
| 11 |
#' @param cor Logical, should a correlation plot be made? Defaults to TRUE. |
|
| 12 |
#' @param returnData Logical, should the used to make plots be returned? Defaults to FALSE. |
|
| 13 |
#' @param combine Logical, should plots be combined with patchwork? |
|
| 14 |
#' Defaults to TRUE, which works well when there is a single timepoint being used. |
|
| 15 |
#' @param markSingular Logical, should singular fits be marked in the variance explained plot? |
|
| 16 |
#' This is FALSE by default but it is good practice to check with TRUE in some situations. |
|
| 17 |
#' If TRUE this will add white markings to the plot where models had singular fits, |
|
| 18 |
#' which is the most common problem with this type of model. |
|
| 19 |
#' @param time If the data contains multiple timepoints then which should be used? |
|
| 20 |
#' This can be left NULL which will use the maximum time if \code{timeCol} is specified.
|
|
| 21 |
#' If a single number is provided then that time value will be used. |
|
| 22 |
#' Multiple numbers will include those timepoints. |
|
| 23 |
#' The string "all" will include all timepoints. |
|
| 24 |
#' @param ... Additional arguments passed to \code{lme4::lmer}.
|
|
| 25 |
#' |
|
| 26 |
#' @import lme4 |
|
| 27 |
#' @import ggplot2 |
|
| 28 |
#' @importFrom scales percent_format |
|
| 29 |
#' @importFrom stats as.formula na.omit |
|
| 30 |
#' @import patchwork |
|
| 31 |
#' |
|
| 32 |
#' @return Returns either a plot (if returnData=FALSE) or a list with a plot and |
|
| 33 |
#' data/a list of dataframes (depending on returnData and cor). |
|
| 34 |
#' |
|
| 35 |
#' @examples |
|
| 36 |
#' |
|
| 37 |
#' |
|
| 38 |
#' library(data.table) |
|
| 39 |
#' set.seed(456) |
|
| 40 |
#' df <- data.frame( |
|
| 41 |
#' genotype = rep(c("g1", "g2"), each = 10),
|
|
| 42 |
#' treatment = rep(c("C", "T"), times = 10),
|
|
| 43 |
#' time = rep(c(1:5), times = 2), |
|
| 44 |
#' pheno1 = rnorm(20, 10, 1), |
|
| 45 |
#' pheno2 = sort(rnorm(20, 5, 1)), |
|
| 46 |
#' pheno3 = sort(runif(20)) |
|
| 47 |
#' ) |
|
| 48 |
#' out <- frem(df, des = "genotype", phenotypes = c("pheno1", "pheno2", "pheno3"), returnData = TRUE)
|
|
| 49 |
#' lapply(out, class) |
|
| 50 |
#' frem(df, |
|
| 51 |
#' des = c("genotype", "treatment"), phenotypes = c("pheno1", "pheno2", "pheno3"),
|
|
| 52 |
#' cor = FALSE |
|
| 53 |
#' ) |
|
| 54 |
#' frem(df, |
|
| 55 |
#' des = "genotype", phenotypes = c("pheno1", "pheno2", "pheno3"),
|
|
| 56 |
#' combine = FALSE, timeCol = "time", time = "all" |
|
| 57 |
#' ) |
|
| 58 |
#' frem(df, |
|
| 59 |
#' des = "genotype", phenotypes = c("pheno1", "pheno2", "pheno3"),
|
|
| 60 |
#' combine = TRUE, timeCol = "time", time = 1 |
|
| 61 |
#' ) |
|
| 62 |
#' frem(df, |
|
| 63 |
#' des = "genotype", phenotypes = c("pheno1", "pheno2", "pheno3"),
|
|
| 64 |
#' cor = FALSE, timeCol = "time", time = 3:5, markSingular = TRUE |
|
| 65 |
#' ) |
|
| 66 |
#' |
|
| 67 |
#' @export |
|
| 68 | ||
| 69 |
frem <- function(df, des, phenotypes, timeCol = NULL, cor = TRUE, returnData = FALSE, combine = TRUE, |
|
| 70 |
markSingular = FALSE, time = NULL, ...) {
|
|
| 71 | 7x |
dummyX <- FALSE |
| 72 | 7x |
if (is.null(timeCol)) {
|
| 73 | 2x |
timeCol <- "dummy_x_axis" |
| 74 | 2x |
df[[timeCol]] <- 1 |
| 75 | 2x |
dummyX <- TRUE |
| 76 |
} |
|
| 77 |
#* `Make formulas` |
|
| 78 | 7x |
ext <- FALSE |
| 79 | 7x |
if (length(des) == 2) {
|
| 80 | 3x |
ind_fmla <- paste0("(1|", des[1], ")+(1|", des[2], ")+(1|", des[1], ":", des[2], ")")
|
| 81 |
} else {
|
|
| 82 | 4x |
ind_fmla <- paste(paste0("(1|", des, ")"), collapse = "+")
|
| 83 | 4x |
ext <- TRUE |
| 84 |
} |
|
| 85 |
#* `Find time and subset data` |
|
| 86 | 7x |
if (is.null(time)) {
|
| 87 | 3x |
dat <- na.omit(df[df[[timeCol]] == max(df[[timeCol]]), c(des, phenotypes, timeCol)]) |
| 88 | 3x |
LONGITUDINAL <- FALSE |
| 89 | 4x |
} else if (is.numeric(time) && length(time) == 1) {
|
| 90 | 1x |
dat <- na.omit(df[df[[timeCol]] == time, c(des, phenotypes, timeCol)]) |
| 91 | 1x |
LONGITUDINAL <- FALSE |
| 92 | 3x |
} else if (is.numeric(time) && length(time) > 1) {
|
| 93 | 1x |
LONGITUDINAL <- TRUE |
| 94 | 1x |
dat <- na.omit(df[df[[timeCol]] %in% time, c(des, phenotypes, timeCol)]) |
| 95 | 2x |
} else if (time == "all") {
|
| 96 | 2x |
LONGITUDINAL <- TRUE |
| 97 | 2x |
dat <- na.omit(df[, c(des, phenotypes, timeCol)]) |
| 98 |
} |
|
| 99 | ||
| 100 |
#* `Partition Variance` |
|
| 101 | ||
| 102 | 7x |
H2 <- .partitionVarianceFrem(dat, timeCol, phenotypes, ind_fmla, ext, des, ...) |
| 103 | ||
| 104 | 7x |
if (!ext) {
|
| 105 | 3x |
colnames(H2) <- c(des[1], des[2], "Interaction", "Unexplained", timeCol, "singular", "Phenotypes") |
| 106 |
} else {
|
|
| 107 | 4x |
colnames(H2) <- c(des, "Unexplained", timeCol, "singular", "Phenotypes") |
| 108 |
} |
|
| 109 | 7x |
ordering <- H2[H2[[timeCol]] == max(H2[[timeCol]]), ] |
| 110 | 7x |
H2$Phenotypes <- ordered(H2$Phenotypes, levels = ordering$Phenotypes[order(ordering$Unexplained)]) |
| 111 | 7x |
h2_melt <- data.frame(data.table::melt(as.data.table(H2), id = c("Phenotypes", "singular", timeCol)))
|
| 112 | ||
| 113 | 7x |
if (!ext) {
|
| 114 | 3x |
h2_melt$variable <- ordered(h2_melt$variable, |
| 115 | 3x |
levels = c("Unexplained", des[1], des[2], "Interaction")
|
| 116 |
) |
|
| 117 |
} else {
|
|
| 118 | 4x |
h2_melt$variable <- ordered(h2_melt$variable, |
| 119 | 4x |
levels = c("Unexplained", des)
|
| 120 |
) |
|
| 121 |
} |
|
| 122 | 7x |
anova_dat <- h2_melt |
| 123 | ||
| 124 |
#* `Plot Variance` |
|
| 125 | ||
| 126 | 7x |
plotHelperOutputs <- .fremPlotHelper( |
| 127 | 7x |
LONGITUDINAL, anova_dat, markSingular, dummyX, timeCol, dat, phenotypes, |
| 128 | 7x |
cor, combine |
| 129 |
) |
|
| 130 | 7x |
plot <- plotHelperOutputs[["plot"]] |
| 131 | 7x |
x <- plotHelperOutputs[["x"]] |
| 132 | 7x |
cor <- plotHelperOutputs[["cor"]] |
| 133 | 7x |
out <- .fremCollectOutputs(returnData, cor, H2, x, plot) |
| 134 | 7x |
return(out) |
| 135 |
} |
|
| 136 | ||
| 137 |
#' helper function to plot FREM results |
|
| 138 |
#' @keywords internal |
|
| 139 |
#' @noRd |
|
| 140 | ||
| 141 |
.fremCollectOutputs <- function(returnData, cor, H2, x, plot) {
|
|
| 142 | 7x |
if (returnData) {
|
| 143 | 1x |
if (cor) {
|
| 144 | 1x |
out_data <- list("variance" = H2, "cor" = x)
|
| 145 |
} else {
|
|
| 146 | ! |
out_data <- H2 |
| 147 |
} |
|
| 148 | 1x |
out <- list("plot" = plot, "data" = out_data)
|
| 149 |
} else {
|
|
| 150 | 6x |
out <- plot |
| 151 |
} |
|
| 152 | 7x |
return(out) |
| 153 |
} |
|
| 154 | ||
| 155 | ||
| 156 |
#' helper function to plot FREM results |
|
| 157 |
#' @keywords internal |
|
| 158 |
#' @noRd |
|
| 159 | ||
| 160 |
.fremPlotHelper <- function(LONGITUDINAL, anova_dat, markSingular, dummyX, timeCol, dat, phenotypes, |
|
| 161 |
cor, combine) {
|
|
| 162 | 7x |
x <- NULL |
| 163 | 7x |
if (!LONGITUDINAL) {
|
| 164 | 4x |
p <- ggplot2::ggplot(data = anova_dat) + |
| 165 | 4x |
ggplot2::geom_col(ggplot2::aes(y = .data$Phenotypes, x = .data$value, fill = .data$variable)) + |
| 166 | 4x |
ggplot2::xlab("Variance Explained") +
|
| 167 | 4x |
ggplot2::guides(fill = ggplot2::guide_legend(title = "", reverse = TRUE)) + |
| 168 | 4x |
ggplot2::theme_minimal() + |
| 169 | 4x |
ggplot2::scale_x_continuous(expand = c(0, 0, 0, 0), labels = scales::percent_format()) + |
| 170 | 4x |
ggplot2::theme( |
| 171 | 4x |
axis.text = ggplot2::element_text(size = 14), |
| 172 | 4x |
axis.title.y = ggplot2::element_blank() |
| 173 |
) + |
|
| 174 | 4x |
ggplot2::theme(axis.ticks.length = ggplot2::unit(0.2, "cm")) + |
| 175 | 4x |
ggplot2::theme(legend.position = "bottom") |
| 176 | ||
| 177 | 4x |
if (markSingular) {
|
| 178 | 1x |
p <- p + ggplot2::geom_point( |
| 179 | 1x |
data = anova_dat[as.logical(anova_dat$singular) & anova_dat$variable == "Unexplained", ], |
| 180 | 1x |
aes(x = 0.99, y = .data$Phenotypes), color = "white", shape = 0 |
| 181 |
) |
|
| 182 |
} |
|
| 183 | 4x |
if (dummyX) {
|
| 184 | 2x |
p <- p + ggplot2::theme(axis.title.x.bottom = ggplot2::element_blank()) |
| 185 |
} |
|
| 186 |
} else {
|
|
| 187 | 3x |
p <- ggplot(data = anova_dat, aes(x = .data[[timeCol]], y = .data$value, fill = .data$variable)) + |
| 188 | 3x |
ggplot2::geom_area() + |
| 189 | 3x |
ggplot2::facet_wrap(~ .data$Phenotypes) + |
| 190 | 3x |
ggplot2::ylab("Variance Explained") +
|
| 191 | 3x |
ggplot2::guides(fill = ggplot2::guide_legend(title = "", reverse = TRUE)) + |
| 192 | 3x |
ggplot2::scale_y_continuous(expand = c(0, 0, 0, 0), labels = scales::percent_format()) + |
| 193 | 3x |
ggplot2::scale_x_continuous(expand = c(0, 0, 0, 0), labels = ~ round(., 1)) + |
| 194 | 3x |
ggplot2::theme_minimal() + |
| 195 | 3x |
ggplot2::theme( |
| 196 | 3x |
axis.text.y = ggplot2::element_text(size = 10), |
| 197 | 3x |
axis.title.y = ggplot2::element_blank(), |
| 198 | 3x |
legend.position = "bottom" |
| 199 |
) |
|
| 200 | 3x |
if (markSingular) {
|
| 201 | 1x |
p <- p + ggplot2::geom_vline( |
| 202 | 1x |
data = anova_dat[anova_dat$variable == "Unexplained" & as.logical(anova_dat$singular), ], |
| 203 | 1x |
ggplot2::aes(xintercept = .data[[timeCol]]), color = "white", linetype = 5, linewidth = 0.1 |
| 204 |
) |
|
| 205 |
} |
|
| 206 | 3x |
if (dummyX) {
|
| 207 | ! |
p <- p + ggplot2::theme(axis.title.x.bottom = ggplot2::element_blank()) |
| 208 |
} |
|
| 209 |
} |
|
| 210 | ||
| 211 |
#* `Get Correlations` |
|
| 212 | 7x |
if (length(phenotypes) == 1) {
|
| 213 | ! |
cor <- FALSE |
| 214 |
} |
|
| 215 | 7x |
if (cor) {
|
| 216 | 4x |
corr <- cor(apply(dat[, (colnames(dat) %in% phenotypes)], 2, as.numeric), |
| 217 | 4x |
use = "complete.obs", method = "spearman" |
| 218 |
) |
|
| 219 | 4x |
unexp <- anova_dat[anova_dat$variable == "Unexplained" & |
| 220 | 4x |
anova_dat[[timeCol]] == max(anova_dat[[timeCol]]), ] |
| 221 | 4x |
corr <- corr[ |
| 222 | 4x |
as.character(unexp$Phenotypes[order(unexp$value)]), |
| 223 | 4x |
as.character(unexp$Phenotypes[order(unexp$value)]) |
| 224 |
] |
|
| 225 | 4x |
x <- na.omit( |
| 226 | 4x |
as.data.frame( |
| 227 | 4x |
suppressWarnings(data.table::melt(as.data.table(corr), variable.name = "Var2")) |
| 228 |
) |
|
| 229 |
) |
|
| 230 | 4x |
x$Var1 <- rep(rownames(corr), length.out = nrow(x)) |
| 231 | ||
| 232 | 4x |
x$Var1 <- ordered(x$Var1, levels = unexp$Phenotypes[order(unexp$value)]) |
| 233 | 4x |
x$Var2 <- ordered(x$Var2, levels = unexp$Phenotypes[order(unexp$value)]) |
| 234 | ||
| 235 | 4x |
p2 <- ggplot2::ggplot(x, ggplot2::aes(.data$Var1, .data$Var2)) + |
| 236 | 4x |
ggplot2::geom_point(ggplot2::aes(color = .data$value), size = 4) + |
| 237 | 4x |
ggplot2::scale_color_gradient2( |
| 238 | 4x |
limits = c(-1, 1), |
| 239 | 4x |
midpoint = 0 |
| 240 |
) + |
|
| 241 | 4x |
ggplot2::theme_minimal() + |
| 242 | 4x |
ggplot2::guides(color = ggplot2::guide_colourbar(barwidth = 15, title = NULL)) + |
| 243 | 4x |
ggplot2::labs(x = "Correlations", y = "") + |
| 244 | 4x |
ggplot2::theme( |
| 245 | 4x |
legend.position = "bottom", |
| 246 | 4x |
axis.text.x.bottom = ggplot2::element_text(angle = 90, hjust = 1) |
| 247 |
) |
|
| 248 | 4x |
if (combine) {
|
| 249 | 2x |
p2 <- p2 + ggplot2::theme(axis.text.y.left = ggplot2::element_blank()) |
| 250 |
} |
|
| 251 |
} |
|
| 252 | ||
| 253 | 7x |
if (cor) {
|
| 254 | 4x |
if (combine) {
|
| 255 | 2x |
plot <- p + p2 |
| 256 |
} else {
|
|
| 257 | 2x |
plot <- list(p, p2) |
| 258 |
} |
|
| 259 |
} else {
|
|
| 260 | 3x |
plot <- p |
| 261 |
} |
|
| 262 | 7x |
return(list("plot" = plot, "x" = x, "cor" = cor))
|
| 263 |
} |
|
| 264 | ||
| 265 |
#' helper function to run models in frem |
|
| 266 |
#' @keywords internal |
|
| 267 |
#' @noRd |
|
| 268 | ||
| 269 |
.partitionVarianceFrem <- function(dat, timeCol, phenotypes, ind_fmla, ext, des, ...) {
|
|
| 270 | 7x |
H2 <- data.frame(do.call(rbind, lapply(sort(unique(dat[[timeCol]])), function(tm) {
|
| 271 | 33x |
sub <- dat[dat[[timeCol]] == tm, ] |
| 272 | 33x |
do.call(rbind, lapply(phenotypes, function(e) {
|
| 273 | 451x |
fmla <- as.formula(paste0("as.numeric(", e, ") ~ ", ind_fmla))
|
| 274 | 451x |
model <- suppressMessages(lme4::lmer(fmla, data = sub, ...)) |
| 275 | 451x |
if (length(model@optinfo$conv$lme4) >= 1) {
|
| 276 | 295x |
singular <- any(grepl("isSingular", model@optinfo$conv$lme4$messages))
|
| 277 |
} else {
|
|
| 278 | 156x |
singular <- FALSE |
| 279 |
} |
|
| 280 | 451x |
re <- lme4::VarCorr(model) |
| 281 | 451x |
res <- attr(lme4::VarCorr(model), "sc")^2 |
| 282 | ||
| 283 | 451x |
if (!ext) {
|
| 284 | 421x |
interaction.var <- as.numeric(attr(re[[which(grepl(":", names(re)))]], "stddev"))^2
|
| 285 | 421x |
des1.var <- as.numeric(attr(re[[des[1]]], "stddev"))^2 |
| 286 | 421x |
des2.var <- as.numeric(attr(re[[des[2]]], "stddev"))^2 |
| 287 | ||
| 288 | 421x |
tot.var <- sum(as.numeric(re), res) |
| 289 | 421x |
unexp <- 1 - sum(as.numeric(re)) / sum(as.numeric(re), res) |
| 290 | ||
| 291 | 421x |
h2 <- c( |
| 292 | 421x |
(des1.var / tot.var), |
| 293 | 421x |
(des2.var / tot.var), |
| 294 | 421x |
(interaction.var / tot.var), |
| 295 | 421x |
unexp, |
| 296 | 421x |
tm, |
| 297 | 421x |
singular |
| 298 |
) |
|
| 299 |
} else {
|
|
| 300 | 30x |
var <- lapply(des, function(i) {
|
| 301 | 30x |
as.numeric(attr(re[[i]], "stddev"))^2 |
| 302 |
}) |
|
| 303 | ||
| 304 | 30x |
tot.var <- sum(as.numeric(re), res) |
| 305 | 30x |
unexp <- 1 - sum(as.numeric(re)) / sum(as.numeric(re), res) |
| 306 | ||
| 307 | 30x |
h2 <- c(unlist(var) / tot.var, unexp, tm, singular) |
| 308 |
} |
|
| 309 | 451x |
return(h2) |
| 310 |
})) |
|
| 311 |
}))) |
|
| 312 | 7x |
H2$Phenotypes <- rep(phenotypes, length.out = nrow(H2)) |
| 313 | 7x |
return(H2) |
| 314 |
} |
| 1 |
#' @description |
|
| 2 |
#' Internal function for calculating \alpha and \beta of a distribution represented by multi value |
|
| 3 |
#' traits. |
|
| 4 |
#' @param s1 A data.frame or matrix of multi value traits. The column names should include a number |
|
| 5 |
#' between 0.0001 and 0.9999 representing the "bin". |
|
| 6 |
#' @examples |
|
| 7 |
#' |
|
| 8 |
#' mv_beta <- mvSim( |
|
| 9 |
#' dists = list( |
|
| 10 |
#' rbeta = list(shape1 = 5, shape2 = 8), |
|
| 11 |
#' ), |
|
| 12 |
#' n_samples = c(30) |
|
| 13 |
#' ) |
|
| 14 |
#' .conj_beta_mv( |
|
| 15 |
#' s1 = mv_beta[1:30, -1], priors = list(a = c(0.5), b = c(0.5)), |
|
| 16 |
#' cred.int.level = 0.9, |
|
| 17 |
#' plot = TRUE |
|
| 18 |
#' ) |
|
| 19 |
#' |
|
| 20 |
#' @keywords internal |
|
| 21 |
#' @noRd |
|
| 22 |
.conj_beta_mv <- function(s1 = NULL, priors = NULL, |
|
| 23 |
plot = FALSE, support = NULL, cred.int.level = NULL, |
|
| 24 |
calculatingSupport = FALSE) {
|
|
| 25 |
#* `make default prior if none provided` |
|
| 26 | 15x |
if (is.null(priors)) {
|
| 27 | 7x |
priors <- list(a = 0.5, b = 0.5) |
| 28 |
} |
|
| 29 |
#* `Define dense Support` |
|
| 30 | 15x |
if (is.null(support) && calculatingSupport) {
|
| 31 | 8x |
return(c(0.0001, 0.9999)) |
| 32 |
} |
|
| 33 | 7x |
out <- list() |
| 34 |
#* `Reorder columns if they are not in the numeric order` |
|
| 35 | 7x |
histColsBin <- as.numeric(sub("[a-zA-Z_.]+", "", colnames(s1)))
|
| 36 | 7x |
if (any(histColsBin > 1) || any(histColsBin < 0)) {
|
| 37 | 1x |
stop("Beta Distribution is only defined on [0,1]")
|
| 38 |
} |
|
| 39 | 6x |
bins_order <- sort(histColsBin, index.return = TRUE)$ix |
| 40 | 6x |
s1 <- s1[, bins_order] |
| 41 | ||
| 42 |
#* `Turn matrix into a vector` |
|
| 43 | 6x |
X1 <- rep(histColsBin[bins_order], as.numeric(round(colSums(s1)))) |
| 44 | ||
| 45 |
#* `get parameters for s1 using method of moments`` |
|
| 46 |
#* y ~ Beta(\alpha, \beta) |
|
| 47 |
#* \alpha ~ \bar{y}( ( (\bar{y} * (1-\bar{y}))/\bar(var) )-1 )
|
|
| 48 |
#* \beta ~ (1-\bar{y})( ( (\bar{y} * (1-\bar{y}))/\bar(var) )-1 )
|
|
| 49 | 6x |
mu1 <- mean(X1) #' \bar{y}
|
| 50 | 6x |
nu1 <- var(X1) / (nrow(s1) - 1) #' \bar{var} the unbiased sample variance
|
| 51 | 6x |
alpha1 <- mu1 * ((mu1 * (1 - mu1)) / (nu1) - 1) |
| 52 | 6x |
beta1 <- (1 - mu1) * ((mu1 * (1 - mu1)) / (nu1) - 1) |
| 53 | ||
| 54 |
#* `Add priors` |
|
| 55 | 6x |
a1_prime <- alpha1 + priors$a[1] |
| 56 | 6x |
b1_prime <- beta1 + priors$b[1] |
| 57 | ||
| 58 |
#* `calculate density` |
|
| 59 | 6x |
dens1 <- dbeta(support, a1_prime, b1_prime) |
| 60 | 6x |
pdf1 <- dens1 / sum(dens1) |
| 61 | ||
| 62 |
#* `calculate highest density interval` |
|
| 63 | 6x |
hdi1 <- qbeta(c((1 - cred.int.level) / 2, (1 - ((1 - cred.int.level) / 2))), a1_prime, b1_prime) |
| 64 | ||
| 65 |
#* `calculate highest density estimate` |
|
| 66 | 6x |
hde1 <- .betaHDE(a1_prime, b1_prime) |
| 67 | ||
| 68 |
#* `save summary and parameters` |
|
| 69 | 6x |
out$summary <- data.frame(HDE_1 = hde1, HDI_1_low = hdi1[1], HDI_1_high = hdi1[2]) |
| 70 | 6x |
out$posterior$a <- a1_prime |
| 71 | 6x |
out$posterior$b <- b1_prime |
| 72 |
#* `Make Posterior Draws` |
|
| 73 | 6x |
out$posteriorDraws <- rbeta(10000, a1_prime, b1_prime) |
| 74 | 6x |
out$pdf <- pdf1 |
| 75 |
#* `keep data for plotting` |
|
| 76 | 6x |
if (plot) {
|
| 77 | 2x |
out$plot_df <- data.frame( |
| 78 | 2x |
"range" = support, "prob" = pdf1, |
| 79 | 2x |
"sample" = rep("Sample 1", length(support))
|
| 80 |
) |
|
| 81 |
} |
|
| 82 | ||
| 83 | 6x |
return(out) |
| 84 |
} |
|
| 85 | ||
| 86 | ||
| 87 | ||
| 88 |
#' @description |
|
| 89 |
#' Internal function for calculating \alpha and \beta of a distribution represented by multi value |
|
| 90 |
#' traits. |
|
| 91 |
#' @param s1 A vector of numerics drawn from a beta distribution. |
|
| 92 |
#' @examples |
|
| 93 |
#' .conj_beta_sv( |
|
| 94 |
#' s1 = rbeta(100, 5, 10), |
|
| 95 |
#' priors = list(a = c(0.5, 0.5), b = c(0.5, 0.5)), |
|
| 96 |
#' cred.int.level = 0.9, |
|
| 97 |
#' plot = FALSE |
|
| 98 |
#' ) |
|
| 99 |
#' @keywords internal |
|
| 100 |
#' @noRd |
|
| 101 |
.conj_beta_sv <- function(s1 = NULL, priors = NULL, |
|
| 102 |
plot = FALSE, support = NULL, cred.int.level = NULL, |
|
| 103 |
calculatingSupport = FALSE) {
|
|
| 104 | 13x |
if (any(c(s1) > 1) || any(c(s1) < 0)) {
|
| 105 | 1x |
stop("Beta Distribution is only defined on [0,1]")
|
| 106 |
} |
|
| 107 |
#* `make default prior if none provided` |
|
| 108 | 12x |
if (is.null(priors)) {
|
| 109 | 4x |
priors <- list(a = 0.5, b = 0.5) |
| 110 |
} |
|
| 111 |
#* `Define dense Support` |
|
| 112 | 12x |
if (is.null(support) && calculatingSupport) {
|
| 113 | 6x |
return(c(0.0001, 0.9999)) |
| 114 |
} |
|
| 115 | 6x |
out <- list() |
| 116 | ||
| 117 |
#* `get parameters for s1 using method of moments`` |
|
| 118 |
#* y ~ Beta(\alpha, \beta) |
|
| 119 |
#* \alpha ~ \bar{y}( ( (\bar{y} * (1-\bar{y}))/\bar(var) )-1 )
|
|
| 120 |
#* \beta ~ (1-\bar{y})( ( (\bar{y} * (1-\bar{y}))/\bar(var) )-1 )
|
|
| 121 | 6x |
mu1 <- mean(s1) #' \bar{y}
|
| 122 | 6x |
nu1 <- var(s1) / (length(s1) - 1) #' \bar{var} the unbiased sample variance
|
| 123 | 6x |
alpha1 <- mu1 * ((mu1 * (1 - mu1)) / (nu1) - 1) |
| 124 | 6x |
beta1 <- (1 - mu1) * ((mu1 * (1 - mu1)) / (nu1) - 1) |
| 125 | ||
| 126 |
#* `Add priors in` |
|
| 127 | 6x |
a1_prime <- priors$a[1] + alpha1 |
| 128 | 6x |
b1_prime <- priors$b[1] + beta1 |
| 129 | ||
| 130 |
#* `calculate density over support`` |
|
| 131 | 6x |
dens1 <- dbeta(support, a1_prime, b1_prime) |
| 132 | 6x |
pdf1 <- dens1 / sum(dens1) |
| 133 | ||
| 134 |
#* `calculate highest density interval` |
|
| 135 | 6x |
hdi1 <- qbeta(c((1 - cred.int.level) / 2, (1 - ((1 - cred.int.level) / 2))), a1_prime, b1_prime) |
| 136 | ||
| 137 |
#* `calculate highest density estimate` |
|
| 138 | 6x |
hde1 <- .betaHDE(a1_prime, b1_prime) |
| 139 | ||
| 140 |
#* `save summary and parameters` |
|
| 141 | 6x |
out$summary <- data.frame(HDE_1 = hde1, HDI_1_low = hdi1[1], HDI_1_high = hdi1[2]) |
| 142 | 6x |
out$posterior$a <- a1_prime |
| 143 | 6x |
out$posterior$b <- b1_prime |
| 144 |
#* `Make Posterior Draws` |
|
| 145 | 6x |
out$posteriorDraws <- rbeta(10000, a1_prime, b1_prime) |
| 146 | 6x |
out$pdf <- pdf1 |
| 147 |
#* `keep data for plotting` |
|
| 148 | 6x |
if (plot) {
|
| 149 | 2x |
out$plot_df <- data.frame( |
| 150 | 2x |
"range" = support, "prob" = pdf1, |
| 151 | 2x |
"sample" = rep("Sample 1", length(support))
|
| 152 |
) |
|
| 153 |
} |
|
| 154 | 6x |
return(out) |
| 155 |
} |
|
| 156 | ||
| 157 |
#' @description |
|
| 158 |
#' Internal function for calculating the HDE of a beta distribution |
|
| 159 |
#' @param alpha alpha parameter |
|
| 160 |
#' @param beta beta parameter |
|
| 161 |
#' @examples |
|
| 162 |
#' .betaHDE(1, 2) |
|
| 163 |
#' .betaHDE(2, 1) |
|
| 164 |
#' .betaHDE(10, 10) |
|
| 165 |
#' @keywords internal |
|
| 166 |
#' @noRd |
|
| 167 | ||
| 168 |
.betaHDE <- function(alpha, beta) {
|
|
| 169 | 25x |
if (alpha <= 1 && beta > 1) {
|
| 170 | 1x |
hde <- 0 |
| 171 | 24x |
} else if (alpha > 1 && beta <= 1) {
|
| 172 | 1x |
hde <- 1 |
| 173 |
} else {
|
|
| 174 | 23x |
hde <- (alpha - 1) / (alpha + beta - 2) |
| 175 |
} |
|
| 176 | 25x |
return(hde) |
| 177 |
} |
| 1 |
#' Read in plantCV csv output in wide or long format |
|
| 2 |
#' |
|
| 3 |
#' @param filepath Path to csv file of plantCV output. |
|
| 4 |
#' @param mode NULL (the default) or one of "wide" or "long", partial string matching is supported. |
|
| 5 |
#' This controls whether data is \strong{returned} in long or wide format. If left NULL then
|
|
| 6 |
#' the output format will be the same as the input format. |
|
| 7 |
#' @param traitCol Column with phenotype names, defaults to "trait". |
|
| 8 |
#' This should generally not need to be changed from the default. This, |
|
| 9 |
#' labelCol, and valueCol are used to determine if data are in long format in their |
|
| 10 |
#' raw state (the csv file itself). |
|
| 11 |
#' @param labelCol Column with phenotype labels (units), defaults to "label". |
|
| 12 |
#' This should generally not need to be changed from the default. |
|
| 13 |
#' This is used with traitCol when \code{mode="wide"} to identify
|
|
| 14 |
#' unique traits since some may be ambiguous |
|
| 15 |
#' (ellipseCenter.x vs ellipseCenter.y, bins of histograms, etc) |
|
| 16 |
#' @param valueCol Column with phenotype values, defaults to "value". |
|
| 17 |
#' This should generally not need to be changed from the default. |
|
| 18 |
#' @param reader The function to use to read in data, |
|
| 19 |
#' defaults to NULL in which case \code{data.table::fread} is used if filters are in place
|
|
| 20 |
#' and \code{read.csv} is used otherwise.
|
|
| 21 |
#' Note that if you use \code{read.csv} with filters in place then you will need to specify
|
|
| 22 |
#' \code{header=FALSE} so that the piped output from awk is read correctly.
|
|
| 23 |
#' If fread is too slow for your needs then \code{vroom::vroom()} may be useful.
|
|
| 24 |
#' @param filters If a very large pcv output file is read then it may be desireable |
|
| 25 |
#' to subset it before reading it into R, either for ease of use or because of RAM limitations. |
|
| 26 |
#' The filter argument works with "COLUMN in VALUES" syntax. This can either be a character vector |
|
| 27 |
#' or a list of character vectors. In these vectors there needs to be a column name, |
|
| 28 |
#' one of " in ", " is ", or " = " to match the string exactly, or "contains" |
|
| 29 |
#' to match with awk style regex, then a set of comma delimited values to filter |
|
| 30 |
#' that column for (see examples). Note that this and awk both use awk through \code{pipe()}.
|
|
| 31 |
#' This functionality will not work on a windows system. |
|
| 32 |
#' @param awk As an alternative to filters a direct call to awk can be supplied here, |
|
| 33 |
#' in which case that call will be used through \code{pipe()}.
|
|
| 34 |
#' @param ... Other arguments passed to the reader function. |
|
| 35 |
#' In the case of 'fread' there are several defaults provided already |
|
| 36 |
#' which can be overwritten with these extra arguments. |
|
| 37 |
#' |
|
| 38 |
#' @details |
|
| 39 |
#' In plantCV version 4 the single value traits are returned in wide format from \code{json2csv}
|
|
| 40 |
#' and the multi value traits are returned in long format. Briefly plantCV data was returned as one |
|
| 41 |
#' long table which sparked the emphasis in this function on reading data quickly and parsing it |
|
| 42 |
#' outside of R. With the current plantCV output these options are largely unnecessary. |
|
| 43 |
#' When data is read in using read.pcv the traitCol, valueCol, and labelCol arguments are checked |
|
| 44 |
#' to determine if the data is in long format. This is done to keep compatibility with interim |
|
| 45 |
#' versions of plantcv output where all outputs were in a single long format file. |
|
| 46 |
#' |
|
| 47 |
#' With the current implementation and plantcv output you can read wide or long format files into |
|
| 48 |
#' wide or long format in R. Keep in mind that the 'mode' argument controls the format that will be |
|
| 49 |
#' returned in R, not the format that the data saved as in your csv file. |
|
| 50 |
#' |
|
| 51 |
#' @keywords read.csv pcv4 |
|
| 52 |
#' @return Returns a data.frame in wide or long format. |
|
| 53 |
#' @importFrom stats as.formula |
|
| 54 |
#' @import data.table |
|
| 55 |
#' @examples |
|
| 56 |
#' \donttest{
|
|
| 57 |
#' mv <- paste0( |
|
| 58 |
#' "https://media.githubusercontent.com/media/joshqsumner/", |
|
| 59 |
#' "pcvrTestData/main/pcv4-multi-value-traits.csv" |
|
| 60 |
#' ) |
|
| 61 |
#' sv <- paste0( |
|
| 62 |
#' "https://raw.githubusercontent.com/joshqsumner/", |
|
| 63 |
#' "pcvrTestData/main/pcv4-single-value-traits.csv" |
|
| 64 |
#' ) |
|
| 65 |
#' |
|
| 66 |
#' w2w <- read.pcv(sv, mode = "wide", reader = "fread") |
|
| 67 |
#' dim(w2w) |
|
| 68 |
#' |
|
| 69 |
#' w2l <- read.pcv(sv, mode = "long", reader = "fread") |
|
| 70 |
#' dim(w2l) |
|
| 71 |
#' |
|
| 72 |
#' l2w <- read.pcv(mv, mode = "wide", reader = "fread") |
|
| 73 |
#' dim(l2w) |
|
| 74 |
#' |
|
| 75 |
#' l2l <- read.pcv(mv, mode = "long", reader = "fread") |
|
| 76 |
#' dim(l2l) |
|
| 77 |
#' } |
|
| 78 |
#' |
|
| 79 |
#' @export |
|
| 80 | ||
| 81 |
read.pcv <- function(filepath, mode = NULL, |
|
| 82 |
traitCol = "trait", labelCol = "label", valueCol = "value", |
|
| 83 |
reader = NULL, filters = NULL, awk = NULL, ...) {
|
|
| 84 | 2x |
if (is.null(filters) && is.null(awk)) {
|
| 85 | 2x |
if (is.null(reader)) {
|
| 86 | ! |
reader <- "read.csv" |
| 87 |
} |
|
| 88 | 2x |
if (reader != "fread") {
|
| 89 | ! |
readingFunction <- match.fun(reader) |
| 90 |
} else {
|
|
| 91 | 2x |
readingFunction <- data.table::fread |
| 92 |
} |
|
| 93 | 2x |
df1 <- as.data.frame(readingFunction(filepath, ...)) |
| 94 |
} else {
|
|
| 95 | ! |
if (is.null(reader)) {
|
| 96 | ! |
reader <- "fread" |
| 97 |
} |
|
| 98 | ! |
df1 <- pcv.sub.read(inputFile = filepath, filters = filters, reader = reader, awk = awk, ...) |
| 99 | ! |
if (nrow(df1) < 1) {
|
| 100 | ! |
stop(paste0( |
| 101 | ! |
"0 Rows returned using awk statement:\n", awkHelper(filepath, filters), |
| 102 | ! |
"\nMost common issues are misspellings or not including a column name and affector." |
| 103 |
)) |
|
| 104 |
} |
|
| 105 |
} |
|
| 106 |
#* `check original data format` |
|
| 107 | 2x |
checkDataStateRes <- .readpcvCheckDataState(df1, traitCol, valueCol, labelCol, mode) |
| 108 | 2x |
startsLong <- checkDataStateRes[["startsLong"]] |
| 109 | 2x |
outputMode <- checkDataStateRes[["outputMode"]] |
| 110 |
#* `if data is long and mode is wide` |
|
| 111 | 2x |
out <- .readpcvReshapeData(df1, outputMode, startsLong, traitCol, valueCol, labelCol) |
| 112 | 2x |
colnames(out) <- gsub("/", ".over.", colnames(out))
|
| 113 | 2x |
colnames(out) <- gsub("\\'", "", colnames(out))
|
| 114 | 2x |
return(out) |
| 115 |
} |
|
| 116 | ||
| 117 | ||
| 118 |
#' @description |
|
| 119 |
#' Internal function for checking input data to |
|
| 120 |
#' @param priors priors as a list |
|
| 121 |
#' @keywords internal |
|
| 122 |
#' @noRd |
|
| 123 | ||
| 124 |
.readpcvCheckDataState <- function(df1, traitCol, valueCol, labelCol, mode) {
|
|
| 125 | 4x |
if (all(c(traitCol, valueCol, labelCol) %in% colnames(df1))) {
|
| 126 | 2x |
startsLong <- TRUE |
| 127 | 2x |
} else if (!any(c(traitCol, valueCol, labelCol) %in% colnames(df1))) {
|
| 128 | 1x |
startsLong <- FALSE |
| 129 |
} else {
|
|
| 130 | 1x |
found <- c( |
| 131 | 1x |
"traitCol", |
| 132 | 1x |
"valueCol", |
| 133 | 1x |
"labelCol" |
| 134 | 1x |
)[which(c(traitCol, valueCol, labelCol) %in% colnames(df1))] |
| 135 | 1x |
warning(paste0( |
| 136 | 1x |
paste(found, collapse = ", "), |
| 137 | 1x |
" found in column names of data but either all or none of traitCol,", |
| 138 | 1x |
" valueCol, and labelCol are expected. Data will be returned as is." |
| 139 |
)) |
|
| 140 | 1x |
startsLong <- FALSE |
| 141 |
} |
|
| 142 | ||
| 143 | 4x |
if (is.null(mode)) {
|
| 144 | 3x |
if (startsLong) {
|
| 145 | 1x |
outputMode <- "long" |
| 146 |
} else {
|
|
| 147 | 2x |
outputMode <- "wide" |
| 148 |
} |
|
| 149 |
} else {
|
|
| 150 | 1x |
outputMode <- match.arg(mode, c("wide", "long"))
|
| 151 |
} |
|
| 152 | 4x |
return(list( |
| 153 | 4x |
"startsLong" = startsLong, |
| 154 | 4x |
"outputMode" = outputMode |
| 155 |
)) |
|
| 156 |
} |
|
| 157 | ||
| 158 |
#' @description |
|
| 159 |
#' Internal function for reshaping data in read.pcv |
|
| 160 |
#' @param priors priors as a list |
|
| 161 |
#' @keywords internal |
|
| 162 |
#' @noRd |
|
| 163 | ||
| 164 |
.readpcvReshapeData <- function(df1, outputMode, startsLong, traitCol, valueCol, labelCol) {
|
|
| 165 | 2x |
if (startsLong) {
|
| 166 |
#* `if data is long and mode is wide` |
|
| 167 | 1x |
if (outputMode == "wide") {
|
| 168 | 1x |
long <- df1 |
| 169 | 1x |
if (substr(colnames(long)[1], 1, 1) == "X" && length(unique(long[[1]])) == nrow(long)) {
|
| 170 | ! |
long <- long[, -1] |
| 171 |
} |
|
| 172 | 1x |
long <- long[!is.na(long[[valueCol]]), ] |
| 173 | 1x |
long[[labelCol]] <- ifelse(is.na(long[[labelCol]]), "none", long[[labelCol]]) |
| 174 | 1x |
wide <- as.data.frame(data.table::dcast(data.table::as.data.table(long), |
| 175 | 1x |
as.formula(paste0("... ~ ", traitCol, "+", labelCol)),
|
| 176 | 1x |
value.var = valueCol, sep = "." |
| 177 |
)) |
|
| 178 | 1x |
colnames(wide) <- sub(".none$", "", colnames(wide))
|
| 179 | 1x |
if (any(grepl("hist|frequencies", colnames(wide)))) { # reorder the MV traits by their bins
|
| 180 |
#* get a list of the unique non-numeric parts |
|
| 181 | 1x |
histCols <- colnames(wide)[grepl("hist|frequencies", colnames(wide))]
|
| 182 | 1x |
uniqueMvTraits <- unique(gsub("[.]+$", "", gsub("[0-9]+", "", histCols)))
|
| 183 |
#* for each unique non-numeric part, sort the names |
|
| 184 | 1x |
mvColsReordered <- unlist(lapply(uniqueMvTraits, function(umt) {
|
| 185 | 1x |
iterCols <- histCols[grepl(umt, histCols)] |
| 186 | 1x |
iterColsNumeric <- as.numeric(gsub(paste0(umt, "."), "", iterCols)) |
| 187 | 1x |
bins_order <- sort(iterColsNumeric, index.return = TRUE)$ix |
| 188 | 1x |
iterCols[bins_order] |
| 189 |
})) |
|
| 190 |
#* combine the histCols and the other columns, in the new order. |
|
| 191 | 1x |
sv_and_meta_cols <- colnames(wide)[!grepl("hist|frequencies", colnames(wide))]
|
| 192 | 1x |
wide <- wide[, c(sv_and_meta_cols, mvColsReordered)] |
| 193 |
} |
|
| 194 | 1x |
out <- wide |
| 195 | ||
| 196 |
#* `if data is long and mode is long` |
|
| 197 | ! |
} else if (outputMode == "long") {
|
| 198 | ! |
out <- df1 |
| 199 | ! |
out[[traitCol]] <- gsub("/", ".over.", out[[traitCol]])
|
| 200 | ! |
out[[traitCol]] <- gsub("\\'", "", out[[traitCol]])
|
| 201 |
#* `if data is wide and mode is wide (single value traits only)` |
|
| 202 |
} |
|
| 203 |
} else {
|
|
| 204 | 1x |
if (outputMode == "wide") {
|
| 205 | 1x |
out <- df1 |
| 206 |
#* `if data is wide and mode is long (single value traits only)` |
|
| 207 | ! |
} else if (outputMode == "long") {
|
| 208 |
#* ***** `find phenotype columns as section of numerics at end of data` |
|
| 209 | ! |
sequence <- seq(ncol(df1), 1, -1) |
| 210 | ! |
numeric_cols <- as.numeric(which(unlist(lapply(df1, is.numeric)))) |
| 211 | ! |
pheno_position_in_seq <- which(unlist(lapply(seq_along(numeric_cols), function(i) {
|
| 212 | ! |
sequence[i] == rev(numeric_cols)[i] |
| 213 |
}))) |
|
| 214 | ! |
pheno_cols <- rev(sequence[pheno_position_in_seq]) |
| 215 |
#* ***** `melt data` |
|
| 216 |
#* note this will warn about numeric vs integer |
|
| 217 |
#* so I am suppressing that since it should always be fine to do that. |
|
| 218 | ! |
out <- suppressWarnings( |
| 219 | ! |
as.data.frame( |
| 220 | ! |
data.table::melt( |
| 221 | ! |
data.table::as.data.table(df1), |
| 222 | ! |
measure.vars = pheno_cols, variable.name = traitCol, |
| 223 | ! |
value.name = valueCol |
| 224 |
) |
|
| 225 |
) |
|
| 226 |
) |
|
| 227 |
} |
|
| 228 |
} |
|
| 229 | 2x |
return(out) |
| 230 |
} |
| 1 |
#' Function to visualize common \code{nlme::nlme} growth models.
|
|
| 2 |
#' |
|
| 3 |
#' Models fit using \link{growthSS} inputs by \link{fitGrowth}
|
|
| 4 |
#' (and similar models made through other means) |
|
| 5 |
#' can be visualized easily using this function. This will generally be called by \code{growthPlot}.
|
|
| 6 |
#' |
|
| 7 |
#' @param fit A model fit returned by \code{fitGrowth} with type="nlme".
|
|
| 8 |
#' @param form A formula similar to that in \code{growthSS} inputs (or the \code{pcvrForm} part of the
|
|
| 9 |
#' output) specifying the outcome, predictor, and grouping structure of the data as |
|
| 10 |
#' \code{outcome ~ predictor|individual/group}
|
|
| 11 |
#' @param df A dataframe to use in plotting observed growth curves on top of the model. |
|
| 12 |
#' This must be supplied for nlme models. |
|
| 13 |
#' @param groups An optional set of groups to keep in the plot. |
|
| 14 |
#' Defaults to NULL in which case all groups in the model are plotted. |
|
| 15 |
#' @param timeRange An optional range of times to use. This can be used to view predictions for |
|
| 16 |
#' future data if the avaiable data has not reached some point (such as asymptotic size). |
|
| 17 |
#' @param facetGroups logical, should groups be separated in facets? Defaults to TRUE. |
|
| 18 |
#' @param groupFill logical, should groups have different colors? Defaults to FALSE. If TRUE then |
|
| 19 |
#' viridis colormaps are used in the order of virMaps. |
|
| 20 |
#' @param virMaps order of viridis maps to use. Will be recycled to necessary length. |
|
| 21 |
#' Defaults to "plasma", but will generally be informed by growthPlot's default. |
|
| 22 |
#' @keywords growth-curve |
|
| 23 |
#' @importFrom methods is |
|
| 24 |
#' @import ggplot2 |
|
| 25 |
#' @importFrom stats predict update residuals |
|
| 26 |
#' @importFrom nlme nlme nlme.formula |
|
| 27 |
#' @examples |
|
| 28 |
#' |
|
| 29 |
#' simdf <- growthSim("logistic",
|
|
| 30 |
#' n = 10, t = 25, |
|
| 31 |
#' params = list("A" = c(200, 160), "B" = c(13, 11), "C" = c(3, 3.5))
|
|
| 32 |
#' ) |
|
| 33 |
#' |
|
| 34 |
#' ss <- growthSS( |
|
| 35 |
#' model = "logistic", form = y ~ time | id / group, sigma = "none", |
|
| 36 |
#' df = simdf, start = NULL, type = "nlme" |
|
| 37 |
#' ) |
|
| 38 |
#' |
|
| 39 |
#' fit <- fitGrowth(ss) |
|
| 40 |
#' |
|
| 41 |
#' nlmePlot(fit, form = ss$pcvrForm, groups = NULL, df = ss$df, timeRange = NULL) |
|
| 42 |
#' nlmePlot(fit, form = ss$pcvrForm, groups = "a", df = ss$df, timeRange = 1:10, groupFill = TRUE) |
|
| 43 |
#' |
|
| 44 |
#' @return Returns a ggplot showing an nlme model's credible |
|
| 45 |
#' intervals and optionally the individual growth lines. |
|
| 46 |
#' |
|
| 47 |
#' @export |
|
| 48 |
#' |
|
| 49 | ||
| 50 |
nlmePlot <- function(fit, form, df = NULL, groups = NULL, timeRange = NULL, facetGroups = TRUE, |
|
| 51 |
groupFill = FALSE, virMaps = c("plasma")) {
|
|
| 52 |
#* `get needed information from formula` |
|
| 53 | 4x |
parsed_form <- .parsePcvrForm(form, df) |
| 54 | 4x |
y <- parsed_form$y |
| 55 | 4x |
x <- parsed_form$x |
| 56 | 4x |
individual <- parsed_form$individual |
| 57 | 4x |
if (individual == "dummyIndividual") {
|
| 58 | ! |
individual <- NULL |
| 59 |
} |
|
| 60 | 4x |
group <- parsed_form$group |
| 61 | 4x |
df <- parsed_form$data |
| 62 |
#* `filter by groups if groups != NULL` |
|
| 63 | 4x |
if (!is.null(groups)) {
|
| 64 | 1x |
df <- df[df[[group]] %in% groups, ] |
| 65 |
} |
|
| 66 | 4x |
intVar <- paste0(group, individual) |
| 67 |
#* `make new data if timerange is not NULL` |
|
| 68 | 4x |
if (!is.null(timeRange)) {
|
| 69 | 1x |
new_data <- do.call(rbind, lapply(unique(df[[intVar]]), function(g) {
|
| 70 | 10x |
stats::setNames(data.frame(g, timeRange), c(intVar, x)) |
| 71 |
})) |
|
| 72 | 1x |
new_data[[group]] <- gsub("[.].*", "", new_data[[intVar]])
|
| 73 | 1x |
new_data[[individual]] <- gsub(".*[.]", "", new_data[[intVar]])
|
| 74 | 1x |
df <- df[df[[x]] >= min(timeRange) & df[[x]] <= max(timeRange), ] |
| 75 |
} else {
|
|
| 76 | 3x |
new_data <- df |
| 77 |
} |
|
| 78 | ||
| 79 | 4x |
preds <- new_data |
| 80 | 4x |
preds$trendline <- round(predict(fit, preds), 4) |
| 81 | 4x |
preds <- preds[!duplicated(preds$trendline), ] |
| 82 | 4x |
preds <- .add_sigma_bounds(preds, fit, x, group) |
| 83 | ||
| 84 |
#* `plot` |
|
| 85 | ||
| 86 |
#* `facetGroups` |
|
| 87 | 4x |
facet_layer <- NULL |
| 88 | 4x |
if (facetGroups) {
|
| 89 | 4x |
facet_layer <- ggplot2::facet_wrap(stats::as.formula(paste0("~", group)))
|
| 90 |
} |
|
| 91 |
#* `groupFill` |
|
| 92 | 4x |
pal <- viridis::plasma(2, begin = 0.1, end = 0.9) |
| 93 | 4x |
virList <- lapply(seq_along(unique(df[[group]])), function(i) {
|
| 94 | 7x |
pal |
| 95 |
}) |
|
| 96 | 4x |
if (groupFill) {
|
| 97 | 1x |
virList <- lapply(rep(virMaps, length.out = length(unique(df[[group]]))), function(pal) {
|
| 98 | 1x |
viridis::viridis(2, begin = 0.1, end = 0.9, option = pal) |
| 99 |
}) |
|
| 100 |
} |
|
| 101 |
#* `layer for individual lines if formula was complete` |
|
| 102 | 4x |
individual_lines <- list() |
| 103 | 4x |
if (!is.null(individual)) {
|
| 104 | 4x |
individual_lines <- ggplot2::geom_line( |
| 105 | 4x |
data = df, ggplot2::aes( |
| 106 | 4x |
x = .data[[x]], y = .data[[y]], |
| 107 | 4x |
group = interaction( |
| 108 | 4x |
.data[[individual]], |
| 109 | 4x |
.data[[group]] |
| 110 |
) |
|
| 111 |
), |
|
| 112 | 4x |
linewidth = 0.25, color = "gray40" |
| 113 |
) |
|
| 114 |
} |
|
| 115 | ||
| 116 | 4x |
plot <- ggplot2::ggplot(preds, ggplot2::aes(x = .data[[x]], y = .data[["trendline"]])) + |
| 117 | 4x |
facet_layer + |
| 118 | 4x |
individual_lines + |
| 119 | 4x |
ggplot2::labs(x = x, y = y) + |
| 120 | 4x |
pcv_theme() |
| 121 | ||
| 122 | 4x |
for (g in seq_along(unique(preds[[group]]))) {
|
| 123 | 7x |
iteration_group <- unique(preds[[group]])[g] |
| 124 | 7x |
sub <- preds[preds[[group]] == iteration_group, ] |
| 125 | 7x |
plot <- plot + |
| 126 | 7x |
ggplot2::geom_ribbon( |
| 127 | 7x |
data = sub, ggplot2::aes( |
| 128 | 7x |
ymin = .data[["sigma_ymin"]], |
| 129 | 7x |
ymax = .data[["sigma_ymax"]] |
| 130 |
), |
|
| 131 | 7x |
fill = virList[[g]][1], alpha = 0.5 |
| 132 |
) + |
|
| 133 | 7x |
ggplot2::geom_line(data = sub, color = virList[[g]][2], linewidth = 0.75) |
| 134 |
} |
|
| 135 | ||
| 136 | 4x |
return(plot) |
| 137 |
} |
|
| 138 | ||
| 139 |
#' convenience function for calculating sigma upper and lower bounds |
|
| 140 |
#' @keywords internal |
|
| 141 |
#' @noRd |
|
| 142 |
.add_sigma_bounds <- function(preds, fit, x, group) {
|
|
| 143 | 4x |
res <- do.call(rbind, lapply(unique(preds[[group]]), function(grp) {
|
| 144 | 7x |
varCoef <- as.numeric(fit$modelStruct$varStruct[grp]) |
| 145 | ||
| 146 | 7x |
sub <- preds[preds[[group]] == grp, ] |
| 147 | 7x |
exes <- sub[[x]] |
| 148 | ||
| 149 | 7x |
if (methods::is(fit$modelStruct$varStruct, "varPower")) {
|
| 150 | 4x |
out <- exes^(2 * varCoef) |
| 151 | 3x |
} else if (methods::is(fit$modelStruct$varStruct, "varExp")) {
|
| 152 | ! |
out <- exp(2 * varCoef * exes) |
| 153 | 3x |
} else if (methods::is(fit$modelStruct$varStruct, "varIdent")) {
|
| 154 | 3x |
baseSigma <- fit$sigma |
| 155 | 3x |
varSummary <- summary(fit$modelStruct$varStruct) |
| 156 | 3x |
coefs <- data.frame( |
| 157 | 3x |
x = 1 / unique(attr(varSummary, "weight")), |
| 158 | 3x |
g = unique(attr(varSummary, "groups")) |
| 159 |
) |
|
| 160 | 3x |
out <- baseSigma * coefs[coefs$g == grp, "x"] |
| 161 |
} |
|
| 162 | ||
| 163 | 7x |
sub$sigma_ymax <- sub$trendline + 0.5 * out |
| 164 | 7x |
sub$sigma_ymin <- sub$trendline - 0.5 * out |
| 165 | 7x |
return(sub) |
| 166 |
})) |
|
| 167 | ||
| 168 | 4x |
return(res) |
| 169 |
} |
|
| 170 | ||
| 171 |
#' alias of nlmePlot for using lme models via class matching |
|
| 172 |
#' @keywords internal |
|
| 173 |
#' @noRd |
|
| 174 | ||
| 175 |
lmePlot <- function(fit, form, df = NULL, groups = NULL, timeRange = NULL, facetGroups = TRUE, |
|
| 176 |
groupFill = FALSE, virMaps = c("plasma")) {
|
|
| 177 | ! |
nlmePlot(fit, form, df, groups, timeRange, facetGroups, groupFill, virMaps) |
| 178 |
} |
| 1 |
#' Ease of use wrapper function for fitting various growth models specified by \link{growthSS}
|
|
| 2 |
#' |
|
| 3 |
#' @param ss A list generated by \code{growthSS}.
|
|
| 4 |
#' @param ... Additional arguments passed to model fitting functions determined by \code{ss$type}.
|
|
| 5 |
#' For type = "nlme" these are passed to \code{nlme::nlmeControl}, not \code{nlme::nlme}.
|
|
| 6 |
#' Additional arguments are documented in \link{fitGrowthbrms}, \link{fitGrowthnlme},
|
|
| 7 |
#' \link{fitGrowthnls}, \link{fitGrowthnlrq}, \link{fitGrowthmgcvgam}, \link{fitGrowthsurvreg},
|
|
| 8 |
#' \link{fitGrowthflexsurv}.
|
|
| 9 |
#' @keywords Bayesian brms nlme nls nlrq |
|
| 10 |
#' @return A fit model from the selected type. |
|
| 11 |
#' |
|
| 12 |
#' @examples |
|
| 13 |
#' |
|
| 14 |
#' simdf <- growthSim("logistic",
|
|
| 15 |
#' n = 20, t = 25, |
|
| 16 |
#' params = list("A" = c(200, 160), "B" = c(13, 11), "C" = c(3, 3.5))
|
|
| 17 |
#' ) |
|
| 18 |
#' ss <- growthSS( |
|
| 19 |
#' model = "logistic", form = y ~ time | group, |
|
| 20 |
#' df = simdf, type = "nls" |
|
| 21 |
#' ) |
|
| 22 |
#' fitGrowth(ss) |
|
| 23 |
#' ss <- growthSS( |
|
| 24 |
#' model = "gam", form = y ~ time | group, |
|
| 25 |
#' df = simdf, type = "nls" |
|
| 26 |
#' ) |
|
| 27 |
#' fitGrowth(ss) |
|
| 28 |
#' |
|
| 29 |
#' @export |
|
| 30 | ||
| 31 |
fitGrowth <- function(ss, ...) {
|
|
| 32 | 34x |
if (ss$model == "gam") {
|
| 33 | 8x |
suffix <- "gam" |
| 34 |
} else {
|
|
| 35 | 26x |
suffix <- NULL |
| 36 |
} |
|
| 37 | 34x |
fit_function <- match.fun(paste0("fitGrowth", ss$type, suffix))
|
| 38 | 34x |
fit <- fit_function(ss, ...) |
| 39 | 34x |
return(fit) |
| 40 |
} |
|
| 41 | ||
| 42 |
#' Ease of use brms wrapper function for fitting growth models specified by \code{growthSS}
|
|
| 43 |
#' |
|
| 44 |
#' Helper function generally called from \link{fitGrowth}.
|
|
| 45 |
#' |
|
| 46 |
#' @param ss A list generated by \code{growthSS}.
|
|
| 47 |
#' @param iter A number of iterations to sample for each chain. |
|
| 48 |
#' By default half this length is taken as warm-up for the MCMC algorithm. |
|
| 49 |
#' This defaults to 2000. |
|
| 50 |
#' @param cores A number of cores to run in parallel. |
|
| 51 |
#' This defaults to 1 if the "mc.cores" option is not set. |
|
| 52 |
#' Generally this is specified as one core per chain so that the model is fit in parallel. |
|
| 53 |
#' @param chains A number of markov chains to use, this defaults to 4. |
|
| 54 |
#' @param prior A \code{brmsprior} object if \code{growthSS} did not have priors specified.
|
|
| 55 |
#' If left NULL (the default) and ss does not contain priors then a warning is |
|
| 56 |
#' issued but the model will still attempt to fit. |
|
| 57 |
#' @param backend A backend for brms to use Stan through. |
|
| 58 |
#' This defaults to use "cmdstanr". |
|
| 59 |
#' @param silent Passed to \code{brms::brm} to control verbosity.
|
|
| 60 |
#' This defaults to 0, the most verbose option so that messages and progress are printed. |
|
| 61 |
#' With changes to \code{cmdstanr} and \code{brms} this may be removed, but the option
|
|
| 62 |
#' will be available through \code{...}. Note that this is likely to print lots of
|
|
| 63 |
#' messages during warmup iterations as the MCMC gets started. |
|
| 64 |
#' @param ... Additional arguments passed to \code{brms::brm}.
|
|
| 65 |
#' @keywords Bayesian brms |
|
| 66 |
#' @return A \code{brmsfit} object, see \code{?`brmsfit-class`} for details.
|
|
| 67 |
#' @export |
|
| 68 | ||
| 69 |
fitGrowthbrms <- function(ss, iter = 2000, cores = getOption("mc.cores", 1), chains = 4, prior = NULL,
|
|
| 70 |
backend = "cmdstanr", silent = 0, ...) {
|
|
| 71 | ! |
if (!"prior" %in% names(ss) && is.null(prior)) {
|
| 72 | ! |
warning( |
| 73 | ! |
paste0( |
| 74 | ! |
"No prior was specified. Flat priors will be used, this is likely to cause problems in model ", |
| 75 | ! |
"fitting and yield less accurate results. If you are fitting a gam then ignore this warning." |
| 76 |
) |
|
| 77 |
) |
|
| 78 | ! |
} else if ("prior" %in% names(ss)) {
|
| 79 | ! |
prior <- ss$prior |
| 80 |
} |
|
| 81 | ! |
fit <- brms::brm( |
| 82 | ! |
formula = ss$formula, prior = prior, data = ss$df, family = ss$family, |
| 83 | ! |
iter = iter, init = ss$initfun, cores = cores, chains = chains, |
| 84 | ! |
backend = backend, silent = silent, ... |
| 85 |
) |
|
| 86 | ! |
return(fit) |
| 87 |
} |
|
| 88 | ||
| 89 |
#' Ease of use nlme wrapper function for fitting growth models specified by \code{growthSS}
|
|
| 90 |
#' |
|
| 91 |
#' Helper function generally called from \link{fitGrowth}.
|
|
| 92 |
#' |
|
| 93 |
#' @param ss A list generated by \code{growthSS}.
|
|
| 94 |
#' @param ... Additional arguments passed to \code{nlme::nlmeControl}.
|
|
| 95 |
#' @keywords nlme |
|
| 96 |
#' @return An \code{nlme} object, see \code{?nlme} for details.
|
|
| 97 |
#' @importFrom nlme nlme |
|
| 98 |
#' @export |
|
| 99 | ||
| 100 |
fitGrowthnlme <- function(ss, ...) {
|
|
| 101 | 5x |
fit <- do.call(nlme, |
| 102 | 5x |
args = list( |
| 103 | 5x |
model = ss[["formula"]][["model"]], |
| 104 | 5x |
data = quote(ss[["df"]]), |
| 105 | 5x |
fixed = ss[["formula"]][["fixed"]], |
| 106 | 5x |
random = ss[["formula"]][["random"]], |
| 107 | 5x |
groups = ss[["formula"]][["groups"]], |
| 108 | 5x |
weights = ss[["formula"]][["weights"]], |
| 109 | 5x |
correlation = ss[["formula"]][["cor_form"]], |
| 110 | 5x |
start = ss[["start"]], |
| 111 | 5x |
control = nlme::nlmeControl(returnObject = TRUE, ...) |
| 112 |
) |
|
| 113 |
) |
|
| 114 | 5x |
return(fit) |
| 115 |
} |
|
| 116 | ||
| 117 |
#' Ease of use lme wrapper function for fitting gams specified by \code{growthSS}
|
|
| 118 |
#' |
|
| 119 |
#' Helper function generally called from \link{fitGrowth}.
|
|
| 120 |
#' |
|
| 121 |
#' @param ss A list generated by \code{growthSS}.
|
|
| 122 |
#' @param ... Additional arguments passed to \code{nlme::lmeControl}.
|
|
| 123 |
#' @keywords nlme gam |
|
| 124 |
#' @return An \code{lme} object, see \code{?lme} for details.
|
|
| 125 |
#' @importFrom nlme lme |
|
| 126 |
#' @export |
|
| 127 | ||
| 128 |
fitGrowthnlmegam <- function(ss, ...) {
|
|
| 129 | ! |
fit <- do.call(lme, |
| 130 | ! |
args = list( |
| 131 | ! |
fixed = ss[["formula"]][["model"]], |
| 132 | ! |
data = quote(ss[["df"]]), |
| 133 | ! |
random = ss[["formula"]][["random"]], |
| 134 | ! |
weights = ss[["formula"]][["weights"]], |
| 135 | ! |
correlation = ss[["formula"]][["cor_form"]], |
| 136 | ! |
control = nlme::lmeControl(returnObject = TRUE, ...) |
| 137 |
) |
|
| 138 |
) |
|
| 139 | ! |
return(fit) |
| 140 |
} |
|
| 141 | ||
| 142 | ||
| 143 |
#' Ease of use nls wrapper function for fitting growth models specified by \code{growthSS}
|
|
| 144 |
#' |
|
| 145 |
#' Helper function generally called from \link{fitGrowth}.
|
|
| 146 |
#' |
|
| 147 |
#' @param ss A list generated by \code{growthSS}.
|
|
| 148 |
#' @param ... Additional arguments passed to \code{stats::nls}.
|
|
| 149 |
#' @keywords nls |
|
| 150 |
#' @return An \code{nls} object, see \code{?nls} for details.
|
|
| 151 |
#' @importFrom stats nls |
|
| 152 |
#' @export |
|
| 153 | ||
| 154 |
fitGrowthnls <- function(ss, ...) {
|
|
| 155 | 12x |
fit <- stats::nls( |
| 156 | 12x |
formula = ss[["formula"]], |
| 157 | 12x |
data = ss[["df"]], |
| 158 | 12x |
start = ss[["start"]], ... |
| 159 |
) |
|
| 160 | 12x |
return(fit) |
| 161 |
} |
|
| 162 | ||
| 163 |
#' Ease of use lm wrapper function for fitting gams specified by \code{growthSS}
|
|
| 164 |
#' |
|
| 165 |
#' Helper function generally called from \link{fitGrowth}.
|
|
| 166 |
#' |
|
| 167 |
#' @param ss A list generated by \code{growthSS}.
|
|
| 168 |
#' @param ... Additional arguments passed to \code{stats::lm}.
|
|
| 169 |
#' @keywords nls |
|
| 170 |
#' @return An \code{lm} object, see \code{?lm} for details.
|
|
| 171 |
#' @importFrom stats lm |
|
| 172 |
#' @export |
|
| 173 | ||
| 174 |
fitGrowthnlsgam <- function(ss, ...) {
|
|
| 175 | 4x |
fit <- stats::lm( |
| 176 | 4x |
formula = ss[["formula"]], |
| 177 | 4x |
data = ss[["df"]], |
| 178 | 4x |
weights = ss[["weights"]], ... |
| 179 |
) |
|
| 180 | 4x |
return(fit) |
| 181 |
} |
|
| 182 | ||
| 183 |
#' Ease of use lm wrapper function for fitting growth models specified by \code{mvSS}
|
|
| 184 |
#' |
|
| 185 |
#' Helper function generally called from \link{fitGrowth}.
|
|
| 186 |
#' |
|
| 187 |
#' @param ss A list generated by \code{mvSS}.
|
|
| 188 |
#' @param ... Additional arguments passed to \code{stats::lm}.
|
|
| 189 |
#' @keywords nls |
|
| 190 |
#' @return An \code{lm} object, see \code{?lm} for details.
|
|
| 191 |
#' @importFrom stats lm |
|
| 192 |
#' @export |
|
| 193 | ||
| 194 |
fitGrowthlm <- function(ss, ...) {
|
|
| 195 | ! |
fit <- do.call("lm", args = list(
|
| 196 | ! |
formula = ss[["formula"]], |
| 197 | ! |
data = quote(ss[["df"]]), |
| 198 | ! |
weights = ss[["weights"]], ... |
| 199 |
)) |
|
| 200 | ! |
ocall <- as.character(fit$call) |
| 201 | ! |
ocall[4] <- "weights" |
| 202 | ! |
fit$call <- as.call(str2expression(ocall)) |
| 203 | ! |
return(fit) |
| 204 |
} |
|
| 205 | ||
| 206 |
#' Ease of use nlrq wrapper function for fitting growth models specified by \code{growthSS}
|
|
| 207 |
#' |
|
| 208 |
#' Helper function generally called from \link{fitGrowth}.
|
|
| 209 |
#' |
|
| 210 |
#' @param ss A list generated by \code{growthSS}.
|
|
| 211 |
#' @param cores Optionally specify how many cores to run in parallel if ss$taus is >1L. |
|
| 212 |
#' Defaults to 1 if mc.cores option is not set globally. |
|
| 213 |
#' @param ... Additional arguments passed to \code{quantreg::nlrq}.
|
|
| 214 |
#' @keywords nls |
|
| 215 |
#' @return An \code{nlrqModel} object if tau is length of 1 or a list of such models for multiple taus,
|
|
| 216 |
#' see \code{?quantreg::nlrq} or \code{?nls::nlsModel} for details.
|
|
| 217 |
#' @importFrom parallel mclapply |
|
| 218 |
#' @importFrom quantreg nlrq |
|
| 219 |
#' @export |
|
| 220 | ||
| 221 |
fitGrowthnlrq <- function(ss, cores = getOption("mc.cores", 1), ...) {
|
|
| 222 | 7x |
if (length(ss[["taus"]]) > 1) {
|
| 223 | 5x |
fits <- parallel::mclapply(ss[["taus"]], function(tau) {
|
| 224 | 56x |
fit <- do.call("nlrq", args = list(
|
| 225 | 56x |
formula = ss[["formula"]], |
| 226 | 56x |
data = quote(ss[["df"]]), |
| 227 | 56x |
tau = tau, |
| 228 | 56x |
start = ss[["start"]], ... |
| 229 |
)) |
|
| 230 | 56x |
return(fit) |
| 231 | 5x |
}, mc.cores = cores) |
| 232 | 5x |
names(fits) <- ss[["taus"]] |
| 233 |
} else {
|
|
| 234 | 2x |
fits <- do.call("nlrq", args = list(
|
| 235 | 2x |
formula = ss[["formula"]], |
| 236 | 2x |
data = quote(ss[["df"]]), |
| 237 | 2x |
tau = ss[["taus"]], |
| 238 | 2x |
start = ss[["start"]], ... |
| 239 |
)) |
|
| 240 |
} |
|
| 241 | 7x |
return(fits) |
| 242 |
} |
|
| 243 | ||
| 244 |
#' Ease of use rq wrapper function for fitting gams specified by \code{growthSS}
|
|
| 245 |
#' |
|
| 246 |
#' Helper function generally called from \link{fitGrowth}.
|
|
| 247 |
#' |
|
| 248 |
#' @param ss A list generated by \code{growthSS}.
|
|
| 249 |
#' @param cores number of cores to run in parallel |
|
| 250 |
#' @param ... Additional arguments passed to \code{quantreg::rq}.
|
|
| 251 |
#' @keywords nls |
|
| 252 |
#' @return An \code{rq} object, see \code{?rq} for details.
|
|
| 253 |
#' @importFrom quantreg rq |
|
| 254 |
#' @export |
|
| 255 | ||
| 256 |
fitGrowthnlrqgam <- function(ss, cores = getOption("mc.cores", 1), ...) {
|
|
| 257 | 2x |
if (length(ss[["taus"]]) > 1) {
|
| 258 | 1x |
fits <- parallel::mclapply(ss[["taus"]], function(tau) {
|
| 259 | 3x |
fit <- do.call("rq", args = list(
|
| 260 | 3x |
formula = ss[["formula"]], |
| 261 | 3x |
data = quote(ss[["df"]]), |
| 262 | 3x |
tau = tau, |
| 263 |
... |
|
| 264 |
)) |
|
| 265 | 3x |
return(fit) |
| 266 | 1x |
}, mc.cores = cores) |
| 267 | 1x |
names(fits) <- ss[["taus"]] |
| 268 |
} else {
|
|
| 269 | 1x |
fits <- do.call("rq", args = list(
|
| 270 | 1x |
formula = ss[["formula"]], |
| 271 | 1x |
data = quote(ss[["df"]]), |
| 272 | 1x |
tau = ss[["taus"]], |
| 273 |
... |
|
| 274 |
)) |
|
| 275 |
} |
|
| 276 | 2x |
return(fits) |
| 277 |
} |
|
| 278 | ||
| 279 |
#' Ease of use rq wrapper function for fitting models specified by \code{mvSS}
|
|
| 280 |
#' |
|
| 281 |
#' Helper function generally called from \link{fitGrowth}.
|
|
| 282 |
#' |
|
| 283 |
#' @param ss A list generated by \code{mvSS}.
|
|
| 284 |
#' @param cores number of cores to run in parallel |
|
| 285 |
#' @param ... Additional arguments passed to \code{quantreg::rq}.
|
|
| 286 |
#' @keywords nls |
|
| 287 |
#' @return An \code{rq} object, see \code{?rq} for details.
|
|
| 288 |
#' @importFrom quantreg rq |
|
| 289 |
#' @export |
|
| 290 | ||
| 291 |
fitGrowthrq <- function(ss, cores = getOption("mc.cores", 1), ...) {
|
|
| 292 | ! |
if (length(ss[["taus"]]) > 1) {
|
| 293 | ! |
fits <- parallel::mclapply(ss[["taus"]], function(tau) {
|
| 294 | ! |
fit <- do.call("rq", args = list(
|
| 295 | ! |
formula = ss[["formula"]], |
| 296 | ! |
data = quote(ss[["df"]]), |
| 297 | ! |
weights = ss[["weights"]], |
| 298 | ! |
tau = tau, |
| 299 |
... |
|
| 300 |
)) |
|
| 301 | ! |
ocall <- as.character(fit$call) |
| 302 | ! |
ocall[5] <- "weights" |
| 303 | ! |
fit$call <- as.call(str2expression(ocall)) |
| 304 | ! |
return(fit) |
| 305 | ! |
}, mc.cores = cores) |
| 306 | ! |
names(fits) <- ss[["taus"]] |
| 307 |
} else {
|
|
| 308 | ! |
fits <- do.call("rq", args = list(
|
| 309 | ! |
formula = ss[["formula"]], |
| 310 | ! |
data = quote(ss[["df"]]), |
| 311 | ! |
weights = ss[["weights"]], |
| 312 | ! |
tau = ss[["taus"]], |
| 313 |
... |
|
| 314 |
)) |
|
| 315 | ! |
ocall <- as.character(fits$call) |
| 316 | ! |
ocall[5] <- "weights" |
| 317 | ! |
fits$call <- as.call(str2expression(ocall)) |
| 318 |
} |
|
| 319 | ! |
return(fits) |
| 320 |
} |
|
| 321 | ||
| 322 |
#' Ease of use mgcv wrapper function for fitting gams specified by \code{growthSS}
|
|
| 323 |
#' |
|
| 324 |
#' Helper function generally called from \link{fitGrowth}.
|
|
| 325 |
#' |
|
| 326 |
#' @param ss A list generated by \code{growthSS}.
|
|
| 327 |
#' @param ... Additional arguments passed to \code{mgcv::gam}.
|
|
| 328 |
#' @keywords mgcv gam |
|
| 329 |
#' @return An \code{gam} object, see \code{?gam} for details.
|
|
| 330 |
#' @importFrom mgcv gam s |
|
| 331 |
#' @export |
|
| 332 | ||
| 333 |
fitGrowthmgcvgam <- function(ss, ...) {
|
|
| 334 | 2x |
fit <- do.call("gam", args = list(
|
| 335 | 2x |
formula = ss[["formula"]], |
| 336 | 2x |
data = ss[["df"]], |
| 337 |
... |
|
| 338 |
)) |
|
| 339 | 2x |
return(fit) |
| 340 |
} |
|
| 341 | ||
| 342 | ||
| 343 |
#' Ease of use wrapper function for fitting growth models specified by \code{growthSS}
|
|
| 344 |
#' |
|
| 345 |
#' Helper function generally called from \link{fitGrowth}.
|
|
| 346 |
#' |
|
| 347 |
#' @param ss A list generated by \code{growthSS}.
|
|
| 348 |
#' @param ... Additional arguments passed to \code{survival::survreg}.
|
|
| 349 |
#' @keywords nlme |
|
| 350 |
#' @return A \code{survreg} object.
|
|
| 351 |
#' @importFrom survival survreg Surv |
|
| 352 |
#' @export |
|
| 353 | ||
| 354 |
fitGrowthsurvreg <- function(ss, ...) {
|
|
| 355 | 1x |
fit <- do.call("survreg", args = list(
|
| 356 | 1x |
formula = ss[["formula"]], |
| 357 | 1x |
data = quote(ss[["df"]]), |
| 358 | 1x |
dist = ss[["distribution"]], |
| 359 |
... |
|
| 360 |
)) |
|
| 361 | 1x |
return(fit) |
| 362 |
} |
|
| 363 | ||
| 364 |
#' Ease of use wrapper function for fitting growth models specified by \code{growthSS}
|
|
| 365 |
#' |
|
| 366 |
#' Helper function generally called from \link{fitGrowth}.
|
|
| 367 |
#' |
|
| 368 |
#' @param ss A list generated by \code{growthSS}.
|
|
| 369 |
#' @param ... Additional arguments passed to \code{flexsurv::flexsurvreg}.
|
|
| 370 |
#' @keywords flexsurv |
|
| 371 |
#' @return A \code{survreg} object.
|
|
| 372 |
#' @export |
|
| 373 | ||
| 374 |
fitGrowthflexsurv <- function(ss, ...) {
|
|
| 375 | 1x |
fit <- do.call(eval(parse(text = "flexsurv::flexsurvreg")), args = list( |
| 376 | 1x |
formula = ss[["formula"]][["f1"]], |
| 377 | 1x |
anc = ss[["formula"]][["f2"]], |
| 378 | 1x |
data = quote(ss[["df"]]), |
| 379 | 1x |
dist = ss[["distribution"]], |
| 380 |
... |
|
| 381 |
)) |
|
| 382 | 1x |
return(fit) |
| 383 |
} |
| 1 |
#' Function to visualize common \code{quantreg::nlrq} growth models.
|
|
| 2 |
#' |
|
| 3 |
#' Models fit using \link{growthSS} inputs by \link{fitGrowth}
|
|
| 4 |
#' (and similar models made through other means) |
|
| 5 |
#' can be visualized easily using this function. This will generally be called by \code{growthPlot}.
|
|
| 6 |
#' |
|
| 7 |
#' @param fit A model fit, or list of model fits, returned by \code{fitGrowth} with type="nlrq".
|
|
| 8 |
#' @param form A formula similar to that in \code{growthSS} inputs (or the \code{pcvrForm} part of the
|
|
| 9 |
#' output) specifying the outcome, predictor, and grouping structure of the data as |
|
| 10 |
#' \code{outcome ~ predictor|individual/group}. If the individual and group are specified then the
|
|
| 11 |
#' observed growth lines are plotted. |
|
| 12 |
#' @param df A dataframe to use in plotting observed growth curves on top of the model. |
|
| 13 |
#' This must be supplied for nlrq models. |
|
| 14 |
#' @param groups An optional set of groups to keep in the plot. |
|
| 15 |
#' Defaults to NULL in which case all groups in the model are plotted. |
|
| 16 |
#' @param timeRange An optional range of times to use. This can be used to view predictions for |
|
| 17 |
#' future data if the avaiable data has not reached some point (such as asymptotic size). |
|
| 18 |
#' @param facetGroups logical, should groups be separated in facets? Defaults to TRUE. |
|
| 19 |
#' @param groupFill logical, should groups have different colors? Defaults to FALSE. |
|
| 20 |
#' If TRUE then viridis colormaps are used in the order of virMaps |
|
| 21 |
#' @param virMaps order of viridis maps to use. Will be recycled to necessary length. |
|
| 22 |
#' Defaults to "plasma", but will generally be informed by growthPlot's default. |
|
| 23 |
#' @keywords growth-curve |
|
| 24 |
#' @importFrom methods is |
|
| 25 |
#' @import ggplot2 |
|
| 26 |
#' @importFrom stats setNames predict |
|
| 27 |
#' @importFrom viridis plasma |
|
| 28 |
#' @examples |
|
| 29 |
#' |
|
| 30 |
#' simdf <- growthSim("logistic",
|
|
| 31 |
#' n = 20, t = 25, |
|
| 32 |
#' params = list("A" = c(200, 160), "B" = c(13, 11), "C" = c(3, 3.5))
|
|
| 33 |
#' ) |
|
| 34 |
#' ss <- growthSS( |
|
| 35 |
#' model = "logistic", form = y ~ time | id / group, |
|
| 36 |
#' tau = c(0.5, 0.9), df = simdf, start = NULL, type = "nlrq" |
|
| 37 |
#' ) |
|
| 38 |
#' fit <- fitGrowth(ss) |
|
| 39 |
#' nlrqPlot(fit, form = ss$pcvrForm, df = ss$df, groups = "a", timeRange = 1:20) |
|
| 40 |
#' nlrqPlot(fit, form = ss$pcvrForm, df = ss$df, groupFill = TRUE, virMaps = c("plasma", "viridis"))
|
|
| 41 |
#' |
|
| 42 |
#' @return Returns a ggplot showing an nlrq model's quantiles |
|
| 43 |
#' and optionally the individual growth lines. |
|
| 44 |
#' |
|
| 45 |
#' @export |
|
| 46 | ||
| 47 |
nlrqPlot <- function(fit, form, df = NULL, groups = NULL, timeRange = NULL, |
|
| 48 |
facetGroups = TRUE, groupFill = FALSE, virMaps = c("plasma")) {
|
|
| 49 |
#* `get needed information from formula` |
|
| 50 | 4x |
parsed_form <- .parsePcvrForm(form, df) |
| 51 | 4x |
y <- parsed_form$y |
| 52 | 4x |
x <- parsed_form$x |
| 53 | 4x |
individual <- parsed_form$individual |
| 54 | 4x |
if (individual == "dummyIndividual") {
|
| 55 | ! |
individual <- NULL |
| 56 |
} |
|
| 57 | 4x |
group <- parsed_form$group |
| 58 | 4x |
df <- parsed_form$data |
| 59 |
#* `filter by groups if groups != NULL` |
|
| 60 | 4x |
if (!is.null(groups)) {
|
| 61 | 1x |
df <- df[df[[group]] %in% groups, ] |
| 62 |
} |
|
| 63 |
#* `make new data if timerange is not NULL` |
|
| 64 | 4x |
if (!is.null(timeRange)) {
|
| 65 | 1x |
new_data <- do.call(rbind, lapply(unique(df[[group]]), function(g) {
|
| 66 | 1x |
stats::setNames(data.frame(g, timeRange), c(group, x)) |
| 67 |
})) |
|
| 68 |
} else {
|
|
| 69 | 3x |
new_data <- NULL |
| 70 |
} |
|
| 71 |
#* `standardize fit class` |
|
| 72 | 4x |
if (methods::is(fit, "nlrq")) {
|
| 73 | ! |
fit <- list(fit) |
| 74 | ! |
names(fit) <- fit[[1]]$m$tau() |
| 75 |
} |
|
| 76 |
#* `add predictions` |
|
| 77 | 4x |
preds <- do.call(cbind, lapply(fit, function(f) {
|
| 78 | 31x |
tau <- f$m$tau() |
| 79 | 31x |
stats::setNames(data.frame(stats::predict(f, newdata = new_data)), paste0("Q_", tau))
|
| 80 |
})) |
|
| 81 | 4x |
predCols <- colnames(preds) |
| 82 | 4x |
keep <- which(!duplicated(preds)) |
| 83 | 4x |
plotdf <- cbind(df[keep, ], preds[keep, ]) |
| 84 | 4x |
colnames(plotdf) <- c(colnames(df), colnames(preds)) |
| 85 | ||
| 86 |
#* `facetGroups` |
|
| 87 | 4x |
facet_layer <- NULL |
| 88 | 4x |
if (facetGroups) {
|
| 89 | 4x |
facet_layer <- ggplot2::facet_wrap(stats::as.formula(paste0("~", group)))
|
| 90 |
} |
|
| 91 |
#* `groupFill` |
|
| 92 | 4x |
if (groupFill) {
|
| 93 | 2x |
virList <- lapply(rep(virMaps, length.out = length(unique(df[[group]]))), function(pal) {
|
| 94 | 4x |
virpal_p1 <- viridis::viridis(ceiling(length(predCols) / 2), direction = 1, end = 1, option = pal) |
| 95 | 4x |
virpal_p2 <- viridis::viridis(ceiling(length(predCols) / 2), |
| 96 | 4x |
direction = -1, end = 1, option = pal |
| 97 | 4x |
)[-1] |
| 98 | 4x |
c(virpal_p1, virpal_p2) |
| 99 |
}) |
|
| 100 |
} else {
|
|
| 101 | 2x |
virpal_p1 <- viridis::plasma(ceiling(length(predCols) / 2), direction = 1, end = 1) |
| 102 | 2x |
virpal_p2 <- viridis::plasma(ceiling(length(predCols) / 2), direction = -1, end = 1)[-1] |
| 103 | 2x |
virpal <- c(virpal_p1, virpal_p2) |
| 104 | 2x |
virList <- lapply(seq_along(unique(df[[group]])), function(i) {
|
| 105 | 3x |
virpal |
| 106 |
}) |
|
| 107 |
} |
|
| 108 |
#* `layer for individual lines if formula was complete` |
|
| 109 | 4x |
if (!is.null(individual)) {
|
| 110 | 4x |
individual_lines <- ggplot2::geom_line( |
| 111 | 4x |
data = df, ggplot2::aes( |
| 112 | 4x |
x = .data[[x]], y = .data[[y]], |
| 113 | 4x |
group = interaction( |
| 114 | 4x |
.data[[individual]], |
| 115 | 4x |
.data[[group]] |
| 116 |
) |
|
| 117 |
), |
|
| 118 | 4x |
linewidth = 0.25, color = "gray40" |
| 119 |
) |
|
| 120 |
} else {
|
|
| 121 | ! |
individual_lines <- list() |
| 122 |
} |
|
| 123 |
#* `plot` |
|
| 124 | 4x |
plot <- ggplot(plotdf, ggplot2::aes(group = interaction(.data[[group]]))) + |
| 125 | 4x |
facet_layer + |
| 126 | 4x |
individual_lines + |
| 127 | 4x |
labs(x = x, y = as.character(form)[2]) + |
| 128 | 4x |
pcv_theme() |
| 129 | ||
| 130 | 4x |
for (g in seq_along(unique(plotdf[[group]]))) {
|
| 131 | 7x |
iteration_group <- unique(plotdf[[group]])[g] |
| 132 | 7x |
sub <- plotdf[plotdf[[group]] == iteration_group, ] |
| 133 | 7x |
plot <- plot + |
| 134 | 7x |
lapply(seq_along(predCols), function(i) {
|
| 135 | 60x |
ggplot2::geom_line( |
| 136 | 60x |
data = sub, ggplot2::aes(x = .data[[x]], y = .data[[predCols[i]]]), |
| 137 | 60x |
color = virList[[g]][i], linewidth = 0.7 |
| 138 |
) |
|
| 139 |
}) |
|
| 140 |
} |
|
| 141 | ||
| 142 | 4x |
return(plot) |
| 143 |
} |
| 1 |
#' Reduce phenotypes in longitudinal data to cumulative sums of phenotypes. |
|
| 2 |
#' |
|
| 3 |
#' |
|
| 4 |
#' |
|
| 5 |
#' @description Often in bellwether experiments we are curious about the effect of |
|
| 6 |
#' some treatment vs control. For certain routes in analysing the data this requires |
|
| 7 |
#' considering phenotypes as relative differences compared to a control. |
|
| 8 |
#' |
|
| 9 |
#' @param df Dataframe to use, this can be in long or wide format. |
|
| 10 |
#' @param phenotypes A character vector of column names for the phenotypes |
|
| 11 |
#' that should be compared against control. |
|
| 12 |
#' @param group A character vector of column names that identify groups in the data. |
|
| 13 |
#' Defaults to "barcode". These groups will be calibrated separately, with the exception |
|
| 14 |
#' of the group that identifies a control within the greater hierarchy. |
|
| 15 |
#' @param timeCol Column name to use for time data. |
|
| 16 |
#' @param traitCol Column with phenotype names, defaults to "trait". |
|
| 17 |
#' This should generally not need to be changed from the default. |
|
| 18 |
#' If this and valueCol are present in colnames(df) then the data |
|
| 19 |
#' is assumed to be in long format. |
|
| 20 |
#' @param valueCol Column with phenotype values, defaults to "value". |
|
| 21 |
#' This should generally not need to be changed from the default. |
|
| 22 |
#' @return A dataframe with cumulative sum columns added for specified phenotypes |
|
| 23 |
#' @keywords single-value-traits |
|
| 24 |
#' @importFrom stats setNames |
|
| 25 |
#' @examples |
|
| 26 |
#' \donttest{
|
|
| 27 |
#' sv <- read.pcv( |
|
| 28 |
#' "https://raw.githubusercontent.com/joshqsumner/pcvrTestData/main/pcv4-single-value-traits.csv", |
|
| 29 |
#' reader = "fread" |
|
| 30 |
#' ) |
|
| 31 |
#' sv$genotype <- substr(sv$barcode, 3, 5) |
|
| 32 |
#' sv$genotype <- ifelse(sv$genotype == "002", "B73", |
|
| 33 |
#' ifelse(sv$genotype == "003", "W605S", |
|
| 34 |
#' ifelse(sv$genotype == "004", "MM", "Mo17") |
|
| 35 |
#' ) |
|
| 36 |
#' ) |
|
| 37 |
#' sv$fertilizer <- substr(sv$barcode, 8, 8) |
|
| 38 |
#' sv$fertilizer <- ifelse(sv$fertilizer == "A", "100", |
|
| 39 |
#' ifelse(sv$fertilizer == "B", "50", "0") |
|
| 40 |
#' ) |
|
| 41 |
#' |
|
| 42 |
#' sv <- bw.time(sv, |
|
| 43 |
#' plantingDelay = 0, phenotype = "area_pixels", cutoff = 10, |
|
| 44 |
#' timeCol = "timestamp", group = c("barcode", "rotation"), plot = TRUE
|
|
| 45 |
#' )$data |
|
| 46 |
#' sv <- bw.outliers(sv, |
|
| 47 |
#' phenotype = "area_pixels", group = c("DAS", "genotype", "fertilizer"),
|
|
| 48 |
#' plotgroup = c("barcode", "rotation")
|
|
| 49 |
#' )$data |
|
| 50 |
#' phenotypes <- colnames(sv)[19:35] |
|
| 51 |
#' phenoForm <- paste0("cbind(", paste0(phenotypes, collapse = ", "), ")")
|
|
| 52 |
#' groupForm <- "DAS+DAP+barcode+genotype+fertilizer" |
|
| 53 |
#' form <- as.formula(paste0(phenoForm, "~", groupForm)) |
|
| 54 |
#' sv <- aggregate(form, data = sv, mean, na.rm = TRUE) |
|
| 55 |
#' pixels_per_cmsq <- 42.5^2 # pixel per cm^2 |
|
| 56 |
#' sv$area_cm2 <- sv$area_pixels / pixels_per_cmsq |
|
| 57 |
#' sv$height_cm <- sv$height_pixels / 42.5 |
|
| 58 |
#' df <- sv |
|
| 59 |
#' phenotypes <- c("area_cm2", "height_cm")
|
|
| 60 |
#' group <- c("barcode")
|
|
| 61 |
#' timeCol <- "DAS" |
|
| 62 |
#' df <- cumulativePheno(df, phenotypes, group, timeCol) |
|
| 63 |
#' |
|
| 64 |
#' |
|
| 65 |
#' sv_l <- read.pcv( |
|
| 66 |
#' "https://raw.githubusercontent.com/joshqsumner/pcvrTestData/main/pcv4-single-value-traits.csv", |
|
| 67 |
#' mode = "long", reader = "fread" |
|
| 68 |
#' ) |
|
| 69 |
#' sv_l$genotype <- substr(sv_l$barcode, 3, 5) |
|
| 70 |
#' sv_l$genotype <- ifelse(sv_l$genotype == "002", "B73", |
|
| 71 |
#' ifelse(sv_l$genotype == "003", "W605S", |
|
| 72 |
#' ifelse(sv_l$genotype == "004", "MM", "Mo17") |
|
| 73 |
#' ) |
|
| 74 |
#' ) |
|
| 75 |
#' sv_l$fertilizer <- substr(sv_l$barcode, 8, 8) |
|
| 76 |
#' sv_l$fertilizer <- ifelse(sv_l$fertilizer == "A", "100", |
|
| 77 |
#' ifelse(sv_l$fertilizer == "B", "50", "0") |
|
| 78 |
#' ) |
|
| 79 |
#' sv_l <- bw.time(sv_l, |
|
| 80 |
#' plantingDelay = 0, phenotype = "area_pixels", cutoff = 10, |
|
| 81 |
#' timeCol = "timestamp", group = c("barcode", "rotation")
|
|
| 82 |
#' )$data |
|
| 83 |
#' sv_l <- cumulativePheno(sv_l, |
|
| 84 |
#' phenotypes = c("area_pixels", "height_pixels"),
|
|
| 85 |
#' group = c("barcode", "rotation"), timeCol = "DAS"
|
|
| 86 |
#' ) |
|
| 87 |
#' } |
|
| 88 |
#' |
|
| 89 |
#' @export |
|
| 90 |
#' |
|
| 91 |
cumulativePheno <- function(df, phenotypes = NULL, group = "barcode", timeCol = "DAS", |
|
| 92 |
traitCol = "trait", valueCol = "value") {
|
|
| 93 | 1x |
if (all(c(traitCol, valueCol) %in% colnames(df))) {
|
| 94 | ! |
wide <- FALSE |
| 95 |
} else {
|
|
| 96 | 1x |
wide <- TRUE |
| 97 |
} |
|
| 98 | ||
| 99 | 1x |
if (length(group) > 1) {
|
| 100 | ! |
df$GROUP <- as.character(interaction(df[, group])) |
| 101 | ! |
group <- "GROUP" |
| 102 |
} |
|
| 103 | ||
| 104 | 1x |
if (!wide) {
|
| 105 | ! |
dat_sp <- split(df, df[[group]]) |
| 106 | ! |
out <- do.call(rbind, lapply(split(df, df[[group]]), function(d) {
|
| 107 | ! |
newRows <- do.call(rbind, lapply(phenotypes, function(pheno) {
|
| 108 | ! |
di <- d[d[[traitCol]] == pheno, ] |
| 109 | ! |
di[[valueCol]] <- cumsum(di[[valueCol]]) |
| 110 | ! |
di[[traitCol]] <- paste0(pheno, "_csum") |
| 111 | ! |
di |
| 112 |
})) |
|
| 113 | ! |
rbind(d, newRows) |
| 114 |
})) |
|
| 115 |
} else {
|
|
| 116 | 1x |
dat_sp <- split(df, df[[group]]) |
| 117 | 1x |
out <- do.call(rbind, lapply(dat_sp, function(d) {
|
| 118 | 88x |
d <- d[sort(d[[timeCol]], index.return = TRUE)$ix, ] |
| 119 | 88x |
d2 <- setNames(as.data.frame(do.call(cbind, lapply(phenotypes, function(pheno) {
|
| 120 | 176x |
cumsum(d[[pheno]]) |
| 121 | 88x |
}))), paste0(phenotypes, "_csum")) |
| 122 | 88x |
cbind(d, d2) |
| 123 |
})) |
|
| 124 |
} |
|
| 125 | 1x |
return(out) |
| 126 |
} |
| 1 |
#' Multi Value Trait Aggregation function |
|
| 2 |
#' |
|
| 3 |
#' @description EMD can get very heavy with large datasets. For an example |
|
| 4 |
#' lemnatech dataset filtering for images from every 5th day there are |
|
| 5 |
#' 6332^2 = 40,094,224 pairwise EMD values. In long format that's a 40 million row dataframe, |
|
| 6 |
#' which is unwieldy. This function is to help reduce the size of datasets before |
|
| 7 |
#' comparing histograms and moving on with matrix methods or network analysis. |
|
| 8 |
#' |
|
| 9 |
#' @param df A dataframe with multi value traits. This can be in wide or long format, |
|
| 10 |
#' data is assumed to be long if traitCol, valueCol, and labelCol are present. |
|
| 11 |
#' @param group Vector of column names for variables which uniquely identify groups |
|
| 12 |
#' in the data to summarize data over. Typically this would be the design variables |
|
| 13 |
#' and a time variable. |
|
| 14 |
#' @param mvCols Either a vector of column names/positions representing multi value |
|
| 15 |
#' traits or a character string that identifies the multi value trait columns as a |
|
| 16 |
#' regex pattern. Defaults to "frequencies". |
|
| 17 |
#' @param n_per_group Number of rows to return for each group. |
|
| 18 |
#' @param outRows Optionally this is a different way to specify how many rows to return. |
|
| 19 |
#' This will often not be exact so that groups have the same number of observations each. |
|
| 20 |
#' @param keep A vector of single value traits to also average over groups, if there are |
|
| 21 |
#' a mix of single and multi value traits in your data. |
|
| 22 |
#' @param parallel Optionally the groups can be run in parallel with this number of cores, |
|
| 23 |
#' defaults to 1 if the "mc.cores" option is not set globally. |
|
| 24 |
#' @param traitCol Column with phenotype names, defaults to "trait". |
|
| 25 |
#' @param labelCol Column with phenotype labels (units), defaults to "label". |
|
| 26 |
#' @param valueCol Column with phenotype values, defaults to "value". |
|
| 27 |
#' @param id Column that uniquely identifies images if the data is in long format. |
|
| 28 |
#' This is ignored when data is in wide format. |
|
| 29 |
#' @keywords emd multi-value-trait |
|
| 30 |
#' @import parallel |
|
| 31 |
#' @importFrom stats setNames aggregate as.formula |
|
| 32 |
#' @examples |
|
| 33 |
#' |
|
| 34 |
#' s1 <- mvSim( |
|
| 35 |
#' dists = list(runif = list(min = 15, max = 150)), |
|
| 36 |
#' n_samples = 10, |
|
| 37 |
#' counts = 1000, |
|
| 38 |
#' min_bin = 1, |
|
| 39 |
#' max_bin = 180, |
|
| 40 |
#' wide = TRUE |
|
| 41 |
#' ) |
|
| 42 |
#' mv_ag(s1, group = "group", mvCols = "sim_", n_per_group = 2) |
|
| 43 |
#' |
|
| 44 |
#' @return Returns a dataframe summarized by the specified groups over the multi-value traits. |
|
| 45 |
#' @export |
|
| 46 | ||
| 47 |
mv_ag <- function(df, group, mvCols = "frequencies", n_per_group = 1, outRows = NULL, keep = NULL, |
|
| 48 |
parallel = getOption("mc.cores", 1),
|
|
| 49 |
traitCol = "trait", labelCol = "label", valueCol = "value", id = "image") {
|
|
| 50 |
#* ***** [decide if data is long or wide] |
|
| 51 | 3x |
if (all(c(traitCol, valueCol, labelCol) %in% colnames(df))) {
|
| 52 | ! |
long <- TRUE |
| 53 | 3x |
} else if (!any(c(traitCol, valueCol, labelCol) %in% colnames(df))) {
|
| 54 | 3x |
long <- FALSE |
| 55 |
} else {
|
|
| 56 | ! |
found <- c("traitCol", "valueCol", "labelCol")[which(c(
|
| 57 | ! |
traitCol, |
| 58 | ! |
valueCol, |
| 59 | ! |
labelCol |
| 60 | ! |
) %in% colnames(df))] |
| 61 | ! |
stop(paste0( |
| 62 | ! |
paste(found, collapse = ", "), |
| 63 | ! |
" found in column names of data but either all or none of traitCol, ", |
| 64 | ! |
"valueCol, and labelCol are expected." |
| 65 |
)) |
|
| 66 |
} |
|
| 67 | ||
| 68 | ||
| 69 |
#* ***** [calculated values] |
|
| 70 | 3x |
multi_group <- FALSE |
| 71 | 3x |
if (length(group) > 1) {
|
| 72 | 2x |
original_group <- group |
| 73 | 2x |
df$INTERNAL_MULTI_GROUP <- as.character(interaction(df[, group])) |
| 74 | 2x |
group <- "INTERNAL_MULTI_GROUP" |
| 75 | 2x |
multi_group <- TRUE |
| 76 |
} |
|
| 77 | ||
| 78 | ||
| 79 |
#* ***** [wide column selection] |
|
| 80 | 3x |
if (!long) {
|
| 81 | 3x |
if (length(mvCols) == 1 && is.character(mvCols)) {
|
| 82 | 3x |
mvCols <- colnames(df)[grepl(mvCols, colnames(df))] |
| 83 |
} |
|
| 84 | 3x |
if (is.numeric(mvCols)) {
|
| 85 | ! |
mvCols <- colnames(df)[mvCols] |
| 86 |
} |
|
| 87 |
} else {
|
|
| 88 |
#* ***** [long trait identification] |
|
| 89 | ||
| 90 | ! |
df <- df[grepl(mvCols, df[[traitCol]]), ] |
| 91 | ! |
if (length(unique(df[[traitCol]])) > 1) {
|
| 92 | ! |
stop(paste0( |
| 93 | ! |
"In long format mvCols should only match one trait, ", |
| 94 | ! |
mvCols, " matches ", paste0(unique(df[[traitCol]]), collapse = ", ") |
| 95 |
)) |
|
| 96 |
} |
|
| 97 |
} |
|
| 98 | ||
| 99 |
#* ***** [split data by group] |
|
| 100 | ||
| 101 | 3x |
dat_sp <- split(x = df, f = df[[group]]) |
| 102 | 3x |
if (!is.null(outRows)) {
|
| 103 | ! |
n_per_group <- round(length(dat_sp) / outRows) |
| 104 |
} |
|
| 105 | ||
| 106 |
#* ***** [aggregate wide format data] |
|
| 107 | 3x |
if (!long) {
|
| 108 | 3x |
out <- .mv_wide_ag(dat_sp, mvCols, n_per_group, parallel, group, keep) |
| 109 |
} else {
|
|
| 110 |
#* ***** [aggregate long data] |
|
| 111 | ! |
out <- .mv_long_ag(dat_sp, traitCol, valueCol, labelCol, n_per_group, parallel, group, keep, id) |
| 112 |
} |
|
| 113 | ||
| 114 | 3x |
if (multi_group) {
|
| 115 | 2x |
group_df <- setNames(data.frame(out[[group]]), "group") |
| 116 | 2x |
group_df <- setNames(as.data.frame( |
| 117 | 2x |
do.call(rbind, lapply( |
| 118 | 2x |
group_df$group, |
| 119 | 2x |
function(s) {
|
| 120 | 691x |
matrix(strsplit(s, split = "[.]")[[1]], nrow = 1) |
| 121 |
} |
|
| 122 |
)) |
|
| 123 | 2x |
), original_group) |
| 124 | 2x |
out <- cbind(group_df, out[, -which(colnames(out) == "INTERNAL_MULTI_GROUP")]) |
| 125 |
} |
|
| 126 | ||
| 127 | 3x |
return(out) |
| 128 |
} |
|
| 129 | ||
| 130 |
#' internal helper for wide aggregation |
|
| 131 |
#' @keywords internal |
|
| 132 |
#' @noRd |
|
| 133 | ||
| 134 |
.mv_wide_ag <- function(dat_sp, mvCols, n_per_group, parallel, group, keep) {
|
|
| 135 | 3x |
out <- do.call(rbind, parallel::mclapply(dat_sp, function(d) {
|
| 136 | 473x |
mv <- as.matrix(d[, mvCols], rownames.force = TRUE) |
| 137 | 473x |
mv <- mv / rowSums(mv) # rescale everything to sum to 1 |
| 138 | 473x |
if (nrow(mv) < n_per_group) {
|
| 139 | 17x |
iter_n <- nrow(mv) |
| 140 |
} else {
|
|
| 141 | 456x |
iter_n <- n_per_group |
| 142 |
} |
|
| 143 | 473x |
rownames(mv) <- seq_len(nrow(mv)) |
| 144 | 473x |
nms <- sample(rownames(mv), nrow(mv), replace = FALSE) |
| 145 | 473x |
if (nrow(mv) > 1 & iter_n > 1) {
|
| 146 | 220x |
index <- cut(seq_len(nrow(mv)), iter_n) |
| 147 | 220x |
nms_split <- split(nms, index) |
| 148 |
} else {
|
|
| 149 | 253x |
nms_split <- list(rownames(mv)) |
| 150 |
} |
|
| 151 | ||
| 152 | 473x |
mv_ag <- data.frame(do.call(rbind, lapply(nms_split, function(rwnms) {
|
| 153 | 693x |
mvi <- matrix(mv[rwnms, ], nrow = length(rwnms)) |
| 154 | 693x |
if (nrow(mvi) > 1) {
|
| 155 | 607x |
matrix(colMeans(mvi), nrow = 1) |
| 156 |
} else {
|
|
| 157 | 86x |
mvi |
| 158 |
} |
|
| 159 |
}))) |
|
| 160 | 473x |
colnames(mv_ag) <- mvCols |
| 161 | ||
| 162 | 473x |
if (!is.null(keep)) {
|
| 163 | ! |
kept <- data.frame(do.call(rbind, lapply(nms_split, function(rwnms) {
|
| 164 | ! |
kp <- as.matrix(d[rwnms, keep]) |
| 165 | ! |
if (nrow(kp) > 1) {
|
| 166 | ! |
matrix(colMeans(kp), nrow = 1) |
| 167 |
} else {
|
|
| 168 | ! |
kp |
| 169 |
} |
|
| 170 |
}))) |
|
| 171 | ! |
colnames(kept) <- keep |
| 172 | ! |
mv_ag <- cbind(kept, mv_ag) |
| 173 |
} |
|
| 174 | ||
| 175 | 473x |
mv_ag <- cbind(setNames(data.frame(rep(d[1, group], nrow(mv_ag))), group), mv_ag) |
| 176 | 473x |
return(mv_ag) |
| 177 | 3x |
}, mc.cores = parallel)) |
| 178 | 3x |
return(out) |
| 179 |
} |
|
| 180 | ||
| 181 |
#' internal helper for long data aggregation |
|
| 182 |
#' @keywords internal |
|
| 183 |
#' @noRd |
|
| 184 | ||
| 185 |
.mv_long_ag <- function(dat_sp, traitCol, valueCol, labelCol, n_per_group, parallel, group, keep, id) {
|
|
| 186 | ! |
out <- do.call(rbind, parallel::mclapply(dat_sp, function(d) {
|
| 187 |
#* get unique images |
|
| 188 | ! |
IDS <- unique(d[[id]]) |
| 189 |
#* define number of possible groups |
|
| 190 | ! |
if (length(IDS) < n_per_group) {
|
| 191 | ! |
iter_n <- length(IDS) |
| 192 |
} else {
|
|
| 193 | ! |
iter_n <- n_per_group |
| 194 |
} |
|
| 195 |
#* rescale values to sum to 1 |
|
| 196 | ! |
d <- do.call(rbind, lapply(IDS, function(i) {
|
| 197 | ! |
id_d <- d[d[[id]] == IDS, ] |
| 198 | ! |
id_d[[valueCol]] <- id_d[[valueCol]] / sum(id_d[[valueCol]], na.rm = TRUE) |
| 199 | ! |
return(id_d) |
| 200 |
})) |
|
| 201 |
#* separate IDS into groups |
|
| 202 | ! |
if (length(IDS) > 1 & iter_n > 1) {
|
| 203 | ! |
index <- cut(seq_along(IDS), iter_n) |
| 204 | ! |
ids_split <- split(IDS, index) |
| 205 |
} else {
|
|
| 206 | ! |
ids_split <- list(IDS) |
| 207 |
} |
|
| 208 | ||
| 209 |
#* mean of bin per groups |
|
| 210 | ! |
mv_ag <- data.frame(do.call(rbind, lapply(ids_split, function(ids) {
|
| 211 | ! |
d_group <- d[d[[id]] %in% ids, ] |
| 212 | ! |
aggregate( |
| 213 | ! |
as.formula(paste0( |
| 214 | ! |
valueCol, "~", |
| 215 | ! |
paste0(c(labelCol, traitCol, group), collapse = "+") |
| 216 |
)), |
|
| 217 | ! |
d_group, mean, |
| 218 | ! |
na.rm = TRUE |
| 219 |
) |
|
| 220 |
}))) |
|
| 221 |
#* add back in kept traits |
|
| 222 | ! |
if (!is.null(keep)) {
|
| 223 | ! |
kept <- d[d[[traitCol]] %in% keep, ] |
| 224 | ! |
mv_ag <- cbind(mv_ag, kept) |
| 225 |
} |
|
| 226 | ! |
rownames(mv_ag) <- NULL |
| 227 | ! |
return(mv_ag) |
| 228 | ! |
}, mc.cores = parallel)) |
| 229 | ! |
return(out) |
| 230 |
} |
| 1 |
#' Function to help fulfill elements of the Bayesian Analysis Reporting Guidelines. |
|
| 2 |
#' |
|
| 3 |
#' The Bayesian Analysis Reporting Guidelines were put forward by Kruschke |
|
| 4 |
#' (https://www.nature.com/articles/s41562-021-01177-7) to aide in reproducibility and documentation |
|
| 5 |
#' of Bayesian statistical analyses that are sometimes unfamiliar to reviewers or scientists. |
|
| 6 |
#' The purpose of this function is to summarize goodness of fit metrics from one or more Bayesian models |
|
| 7 |
#' made by \link{growthSS} and \link{fitGrowth}. See details for explanations of those metrics and
|
|
| 8 |
#' the output. |
|
| 9 |
#' |
|
| 10 |
#' @param fit The brmsfit object or a list of brmsfit objects in the case that you split models to run |
|
| 11 |
#' on subsets of the data for computational simplicity. |
|
| 12 |
#' @param ss The growthSS output used to specify the model. If fit is a list then this can either be one |
|
| 13 |
#' growthSS list in which case the priors are assumed to be the same for each model or it can be a list |
|
| 14 |
#' of the same length as fit. Note that the only parts of this which are used are the \code{call$start}
|
|
| 15 |
#' which is expected to be a call, \code{pcvrForm}, and \code{df} list elements,
|
|
| 16 |
#' so if you have a list of brmsfit objects and no ss object you can specify a stand-in list. This |
|
| 17 |
#' can also be left NULL (the default) and posterior predictive plots and prior predictive plots will |
|
| 18 |
#' not be made. |
|
| 19 |
#' |
|
| 20 |
#' |
|
| 21 |
#' @details |
|
| 22 |
#' |
|
| 23 |
#' |
|
| 24 |
#' \itemize{
|
|
| 25 |
#' \item \bold{General}: This includes chain number, length, and total divergent transitions per
|
|
| 26 |
#' model. Divergent transitions are a marker that the MCMC had something go wrong. |
|
| 27 |
#' Conceptually it may be helpful to think about rolling a marble over a 3D curve then having the |
|
| 28 |
#' marble suddenly jolt in an unexpected direction, something happened that suggests a |
|
| 29 |
#' problem/misunderstood surface. In practice you want extremely few (ideally no) divergences. |
|
| 30 |
#' If you do have divergences then consider specifying more control parameters |
|
| 31 |
#' (see brms::brm or examples for \link{fitGrowth}). If the problem persists then the model may need
|
|
| 32 |
#' to be simplified. For more information on MCMC and divergence see the stan |
|
| 33 |
#' manual (https://mc-stan.org/docs/2_19/reference-manual/divergent-transitions). |
|
| 34 |
#' |
|
| 35 |
#' \item \bold{ESS}: ESS stands for Effective Sample Size and is a goodness of fit metric that
|
|
| 36 |
#' approximates the number of independent replicates that would equate to the same amount of |
|
| 37 |
#' information as the (autocorrelated) MCMC iterations. ESS of 1000+ is often considered as a pretty |
|
| 38 |
#' stable value, but more is better. Still, 100 per chain may be plenty depending on your |
|
| 39 |
#' applications and the inference you wish to do. One of the benefits to using lots of chains and/or |
|
| 40 |
#' longer chains is that you will get more complete information and that benefit will be shown by a |
|
| 41 |
#' larger ESS. This is separated into "bulk" and "tail" to represent the middle and tails of the |
|
| 42 |
#' posterior distribution, since those can sometimes have very different sampling behavior. |
|
| 43 |
#' A summary and the total values are returned, with the summary being useful if several models are |
|
| 44 |
#' included in a list for fit argument |
|
| 45 |
#' |
|
| 46 |
#' \item \bold{Rhat}: Rhat is a measure of "chain mixture". It compares the between vs within chain
|
|
| 47 |
#' values to assess how well the chains mixed. If chains did not mix well then Rhat will be greater |
|
| 48 |
#' than 1, with 1.05 being a broadly |
|
| 49 |
#' agreed upon cutoff to signify a problem. Running longer chains should result in lower Rhat |
|
| 50 |
#' values. The default in brms is to run 4 chains, partially to ensure that there is a good chance |
|
| 51 |
#' to check that the chains mixed well via Rhat. A summary and the total values are returned, with |
|
| 52 |
#' the summary being useful if several models are included in a list for fit argument |
|
| 53 |
#' |
|
| 54 |
#' \item \bold{NEFF}: NEFF is the NEFF ratio (Effective Sample Size over Total MCMC Sample Size).
|
|
| 55 |
#' Values greater than 0.5 are generally considered good, but there is a consensus that lower can be |
|
| 56 |
#' fine down to about 0.1. A summary and the total values are returned, with the summary being |
|
| 57 |
#' useful if several models are included in a list for fit argument |
|
| 58 |
#' |
|
| 59 |
#' \item \bold{priorPredictive}: A plot of data simulated from the prior using \link{plotPrior}.
|
|
| 60 |
#' This should generate data that is biologically plausible for your situation, but it will |
|
| 61 |
#' probably be much more variable than your data. That is the effect of the mildly informative thick |
|
| 62 |
#' tailed lognormal priors. If you specified non-default style priors then this currently will not |
|
| 63 |
#' work. |
|
| 64 |
#' |
|
| 65 |
#' \item \bold{posteriorPredictive}: A plot of each model's posterior predictive interval over time.
|
|
| 66 |
#' This is the same as plots returned from \link{growthPlot} and shows 1-99% intervals in purple
|
|
| 67 |
#' coming to a mean yellow trend line. These should encompass the overwhelming majority of your data |
|
| 68 |
#' and ideally match the variance pattern that you see in your data. If parts of the predicted |
|
| 69 |
#' interval are biologically impossible (area below 0, percentage about 100%, etc) then your chosen |
|
| 70 |
#' model should be reconsidered. |
|
| 71 |
#' } |
|
| 72 |
#' |
|
| 73 |
#' |
|
| 74 |
#' |
|
| 75 |
#' |
|
| 76 |
#' @keywords Bayesian brms prior |
|
| 77 |
#' @return A named list containing Rhat, ESS, NEFF, and Prior/Posterior Predictive plots. |
|
| 78 |
#' See details for interpretation. |
|
| 79 |
#' @importFrom rlang is_installed |
|
| 80 |
#' |
|
| 81 |
#' @examples |
|
| 82 |
#' |
|
| 83 |
#' \donttest{
|
|
| 84 |
#' simdf <- growthSim("logistic",
|
|
| 85 |
#' n = 20, t = 25, |
|
| 86 |
#' params = list("A" = c(200, 160), "B" = c(13, 11), "C" = c(3, 3.5))
|
|
| 87 |
#' ) |
|
| 88 |
#' ss <- growthSS( |
|
| 89 |
#' model = "logistic", form = y ~ time | id / group, sigma = "logistic", |
|
| 90 |
#' df = simdf, start = list( |
|
| 91 |
#' "A" = 130, "B" = 12, "C" = 3, |
|
| 92 |
#' "sigmaA" = 20, "sigmaB" = 10, "sigmaC" = 2 |
|
| 93 |
#' ), type = "brms" |
|
| 94 |
#' ) |
|
| 95 |
#' fit_test <- fitGrowth(ss, |
|
| 96 |
#' iter = 600, cores = 1, chains = 1, backend = "cmdstanr", |
|
| 97 |
#' sample_prior = "only" # only sampling from prior for speed |
|
| 98 |
#' ) |
|
| 99 |
#' barg(fit_test, ss) |
|
| 100 |
#' fit_2 <- fit_test |
|
| 101 |
#' fit_list <- list(fit_test, fit_2) |
|
| 102 |
#' x <- barg(fit_list, list(ss, ss)) |
|
| 103 |
#' } |
|
| 104 |
#' |
|
| 105 |
#' @export |
|
| 106 | ||
| 107 |
barg <- function(fit, ss = NULL) {
|
|
| 108 | ! |
out <- list() |
| 109 |
#* `format everything into lists` |
|
| 110 | ! |
if (methods::is(fit, "brmsfit")) {
|
| 111 | ! |
fitList <- list(fit) |
| 112 |
} else {
|
|
| 113 | ! |
fitList <- fit |
| 114 |
} |
|
| 115 | ! |
if (!is.null(names(ss)) && names(ss)[1] == "formula") {
|
| 116 | ! |
ssList <- lapply(seq_along(fitList), function(i) {
|
| 117 | ! |
ss |
| 118 |
}) |
|
| 119 |
} else {
|
|
| 120 | ! |
ssList <- ss |
| 121 |
} |
|
| 122 |
#* `General Info` |
|
| 123 | ! |
general <- do.call(rbind, lapply(seq_along(fitList), function(i) {
|
| 124 | ! |
fitobj <- fitList[[i]] |
| 125 | ! |
ms <- summary(fitobj) |
| 126 | ! |
data.frame( |
| 127 | ! |
chains = ms$chains, |
| 128 | ! |
iter = ms$iter, |
| 129 | ! |
num.divergent = rstan::get_num_divergent(fitobj$fit), |
| 130 | ! |
model = i |
| 131 |
) |
|
| 132 |
})) |
|
| 133 | ! |
out[["General"]] <- general |
| 134 |
#* `Rhat summary` |
|
| 135 | ! |
rhats <- do.call(rbind, lapply(fitList, function(fitobj) {
|
| 136 | ! |
brms::rhat(fitobj) |
| 137 |
})) |
|
| 138 | ! |
rhat_metrics <- apply(rhats, MARGIN = 2, summary) |
| 139 | ! |
rhats$model <- seq_along(fitList) |
| 140 | ! |
out[["Rhat"]][["summary"]] <- rhat_metrics |
| 141 | ! |
out[["Rhat"]][["complete"]] <- rhats |
| 142 |
#* `NEFF summary` |
|
| 143 | ! |
neff <- do.call(rbind, lapply(fitList, function(fitobj) {
|
| 144 | ! |
brms::neff_ratio(fitobj) |
| 145 |
})) |
|
| 146 | ! |
neff_metrics <- apply(neff, MARGIN = 2, summary) |
| 147 | ! |
neff$model <- seq_along(fitList) |
| 148 | ! |
out[["NEFF"]][["summary"]] <- neff_metrics |
| 149 | ! |
out[["NEFF"]][["complete"]] <- neff |
| 150 |
#* `ESS Summary` |
|
| 151 | ! |
ess <- do.call(rbind, lapply(seq_along(fitList), function(i) {
|
| 152 | ! |
fit <- fitList[[i]] |
| 153 | ! |
ms <- summary(fit) |
| 154 | ! |
data.frame( |
| 155 | ! |
"par" = c(rownames(ms$fixed), rownames(ms$spec_pars)), |
| 156 | ! |
"Bulk_ESS" = c(ms$fixed$Bulk_ESS, ms$spec_pars$Bulk_ESS), |
| 157 | ! |
"Tail_ESS" = c(ms$fixed$Tail_ESS, ms$spec_pars$Tail_ESS), |
| 158 | ! |
"model" = i |
| 159 |
) |
|
| 160 |
})) |
|
| 161 | ! |
ag_b_ess <- aggregate(Bulk_ESS ~ par, ess, summary) |
| 162 | ! |
tag_b_ess <- t(ag_b_ess[-1]) |
| 163 | ! |
colnames(tag_b_ess) <- ag_b_ess[, 1] |
| 164 | ! |
ag_t_ess <- aggregate(Tail_ESS ~ par, ess, summary) |
| 165 | ! |
tag_t_ess <- t(ag_t_ess[-1]) |
| 166 | ! |
colnames(tag_t_ess) <- ag_t_ess[, 1] |
| 167 | ! |
ess_metrics <- rbind(tag_b_ess, tag_t_ess) |
| 168 | ! |
out[["ESS"]][["summary"]] <- ess_metrics |
| 169 | ! |
out[["ESS"]][["complete"]] <- ess |
| 170 |
#* `Prior Predictive Check` |
|
| 171 | ! |
if (methods::is(eval(ssList[[1]]$call$start), "list")) {
|
| 172 | ! |
pri_preds <- lapply(seq_along(ssList), function(i) {
|
| 173 | ! |
ss <- ssList[[i]] |
| 174 | ! |
x <- trimws(gsub("[|].*|[/].*", "", as.character(ss$pcvrForm)[3]))
|
| 175 | ! |
t <- max(ss$df[[x]]) |
| 176 | ! |
pri_pred <- plotPrior(priors = eval(ss$call$start), type = ss$model, n = 200, t = t) |
| 177 | ! |
pri_pred$simulated |
| 178 |
}) |
|
| 179 | ! |
out[["priorPredictive"]] <- pri_preds |
| 180 |
} |
|
| 181 |
#* `Posterior Predictive Check` |
|
| 182 | ! |
if (methods::is(eval(ssList[[1]]$pcvrForm), "formula")) {
|
| 183 | ! |
post_preds <- lapply(seq_along(fitList), function(i) {
|
| 184 | ! |
brmPlot(fitList[[i]], form = ssList[[i]]$pcvrForm, df = ssList[[i]]$df) |
| 185 |
}) |
|
| 186 | ! |
out[["posteriorPredictive"]] <- post_preds |
| 187 |
} |
|
| 188 | ! |
return(out) |
| 189 |
} |
| 1 |
#' @description |
|
| 2 |
#' Internal function for Bayesian T Tests of gaussian data represented by single value traits. |
|
| 3 |
#' @param s1 A vector of numerics drawn from a gaussian distribution. |
|
| 4 |
#' @examples |
|
| 5 |
#' \donttest{
|
|
| 6 |
#' .conj_t_sv( |
|
| 7 |
#' s1 = rnorm(100, 50, 10), s2 = rnorm(100, 60, 12), |
|
| 8 |
#' priors = list(mu = c(0, 0), n = c(1, 1), s2 = c(20, 20)), |
|
| 9 |
#' plot = FALSE, rope_range = c(-0.1, 0.1), rope_ci = 0.89, |
|
| 10 |
#' cred.int.level = 0.89, hypothesis = "equal", support = NULL |
|
| 11 |
#' ) |
|
| 12 |
#' } |
|
| 13 |
#' @keywords internal |
|
| 14 |
#' @noRd |
|
| 15 |
.conj_t_sv <- function(s1 = NULL, priors = NULL, |
|
| 16 |
plot = FALSE, support = NULL, cred.int.level = NULL, |
|
| 17 |
calculatingSupport = FALSE) {
|
|
| 18 | 36x |
out <- list() |
| 19 |
#* `make default prior if none provided` |
|
| 20 | 36x |
if (is.null(priors)) {
|
| 21 | 18x |
priors <- list(mu = 0, n = 1, s2 = 100) |
| 22 |
} |
|
| 23 |
#* `Get Mean, Variance, SE, and DF from s1` |
|
| 24 | 36x |
n1 <- length(s1) # n samples |
| 25 | 36x |
m1 <- mean(s1) # xbar |
| 26 | 36x |
s2_1 <- var(s1) # var |
| 27 | ||
| 28 | 36x |
v1 <- priors$n[1] - 1 # prior DF |
| 29 | 36x |
n1_n <- priors$n[1] + n1 # total N including prior |
| 30 | 36x |
m1_n <- (n1 * m1 + priors$n[1] * priors$mu[1]) / n1_n # weighted mean of prior and data |
| 31 | 36x |
v1_n <- v1 + n1 # degrees of freedom including data |
| 32 | 36x |
s2_1_n <- ((n1 - 1) * s2_1 + v1 * priors$s2[1] + priors$n[1] * n1 * (priors$mu[1] - m1)^2 / n1_n) / |
| 33 | 36x |
v1_n # pooled variance |
| 34 | 36x |
se1 <- sqrt(s2_1_n / n1_n) # standard error of the mean |
| 35 |
#* `Define support if it is missing` |
|
| 36 | 36x |
if (is.null(support) && calculatingSupport) {
|
| 37 | 17x |
quantiles <- qlst(c(0.0001, 0.9999), v1_n, m1_n, se1) |
| 38 | 17x |
return(quantiles) |
| 39 |
} |
|
| 40 | 19x |
dens <- extraDistr::dlst(support, v1_n, m1_n, se1) |
| 41 | 19x |
pdf1 <- dens / sum(dens) |
| 42 | 19x |
hde1_mean <- m1_n |
| 43 | 19x |
hdi1_mean <- m1_n + qt(c((1 - cred.int.level) / 2, (1 - ((1 - cred.int.level) / 2))), v1_n) * se1 |
| 44 | ||
| 45 | 19x |
out$summary <- data.frame(HDE_1 = hde1_mean, HDI_1_low = hdi1_mean[1], HDI_1_high = hdi1_mean[2]) |
| 46 | 19x |
out$posterior$mu <- m1_n |
| 47 | 19x |
out$posterior$n <- n1_n |
| 48 | 19x |
out$posterior$s2 <- s2_1_n # return variance |
| 49 |
#* `Make Posterior Draws` |
|
| 50 | 19x |
out$posteriorDraws <- extraDistr::rlst(10000, v1_n, m1_n, se1) |
| 51 | 19x |
out$pdf <- pdf1 |
| 52 |
#* `Save data for plotting` |
|
| 53 | 19x |
if (plot) {
|
| 54 | 13x |
out$plot_df <- data.frame( |
| 55 | 13x |
"range" = support, |
| 56 | 13x |
"prob" = pdf1, |
| 57 | 13x |
"sample" = rep("Sample 1", length(support))
|
| 58 |
) |
|
| 59 |
} |
|
| 60 | 19x |
return(out) |
| 61 |
} |
|
| 62 | ||
| 63 | ||
| 64 | ||
| 65 | ||
| 66 | ||
| 67 |
#' @description |
|
| 68 |
#' Internal function for calculating \alpha and \beta of a distribution represented by multi value |
|
| 69 |
#' traits. |
|
| 70 |
#' @param s1 A data.frame or matrix of multi value traits. The column names should include a number |
|
| 71 |
#' representing the "bin". |
|
| 72 |
#' @examples |
|
| 73 |
#' \donttest{
|
|
| 74 |
#' mv_gauss <- mvSim( |
|
| 75 |
#' dists = list( |
|
| 76 |
#' rnorm = list(mean = 50, sd = 10) |
|
| 77 |
#' ), |
|
| 78 |
#' n_samples = 30 |
|
| 79 |
#' ) |
|
| 80 |
#' .conj_t_mv( |
|
| 81 |
#' s1 = mv_gauss[1:30, -1], |
|
| 82 |
#' priors = NULL, |
|
| 83 |
#' plot = TRUE, |
|
| 84 |
#' cred.int.level = 0.89 |
|
| 85 |
#' ) |
|
| 86 |
#' } |
|
| 87 |
#' @keywords internal |
|
| 88 |
#' @noRd |
|
| 89 | ||
| 90 |
.conj_t_mv <- function(s1 = NULL, priors = NULL, |
|
| 91 |
plot = FALSE, support = NULL, cred.int.level = NULL, |
|
| 92 |
calculatingSupport = FALSE) {
|
|
| 93 | 8x |
out <- list() |
| 94 |
#* `make default prior if none provided` |
|
| 95 | 8x |
if (is.null(priors)) {
|
| 96 | 4x |
priors <- list(mu = 0, n = 1, s2 = 100) |
| 97 |
} |
|
| 98 |
#* `Reorder columns if they are not in the numeric order` |
|
| 99 | 8x |
histColsBin <- as.numeric(sub("[a-zA-Z_.]+", "", colnames(s1)))
|
| 100 | 8x |
bins_order <- sort(histColsBin, index.return = TRUE)$ix |
| 101 | 8x |
s1 <- s1[, bins_order] |
| 102 | ||
| 103 |
#* `Turn s1 matrix into a vector` |
|
| 104 | 8x |
X1 <- rep(histColsBin[bins_order], as.numeric(round(colSums(s1)))) |
| 105 | ||
| 106 |
#* `Get Mean, Variance, SE, and DF from s2` |
|
| 107 | 8x |
n1 <- nrow(s1) # n samples |
| 108 | 8x |
m1 <- mean(X1) # xbar |
| 109 | 8x |
s2_1 <- var(X1) # var |
| 110 | ||
| 111 | 8x |
v1 <- priors$n[1] - 1 # prior DF |
| 112 | 8x |
n1_n <- priors$n[1] + n1 # total N including prior |
| 113 | 8x |
m1_n <- (n1 * m1 + priors$n[1] * priors$mu[1]) / n1_n # weighted mean of prior and data |
| 114 | 8x |
v1_n <- v1 + n1 # degrees of freedom including data |
| 115 | 8x |
s2_1_n <- ((n1 - 1) * s2_1 + v1 * priors$s2[1] + priors$n[1] * n1 * (priors$mu[1] - m1)^2 / n1_n) / |
| 116 | 8x |
v1_n # pooled variance |
| 117 | 8x |
se1 <- sqrt(s2_1_n / n1_n) # standard error of the mean |
| 118 |
#* `Define support if it is missing` |
|
| 119 | 8x |
if (is.null(support) && calculatingSupport) {
|
| 120 | 4x |
quantiles <- qlst(c(0.0001, 0.9999), v1_n, m1_n, se1) |
| 121 | 4x |
return(quantiles) |
| 122 |
} |
|
| 123 | 4x |
dens <- extraDistr::dlst(support, v1_n, m1_n, se1) |
| 124 | 4x |
pdf1 <- dens / sum(dens) |
| 125 | 4x |
hde1_mean <- m1_n |
| 126 | 4x |
hdi1_mean <- m1_n + qt(c((1 - cred.int.level) / 2, (1 - ((1 - cred.int.level) / 2))), v1_n) * se1 |
| 127 | ||
| 128 | 4x |
out$summary <- data.frame(HDE_1 = hde1_mean, HDI_1_low = hdi1_mean[1], HDI_1_high = hdi1_mean[2]) |
| 129 | 4x |
out$posterior$mu <- m1_n |
| 130 | 4x |
out$posterior$n <- n1_n |
| 131 | 4x |
out$posterior$s2 <- s2_1_n |
| 132 |
#* `Make Posterior Draws` |
|
| 133 | 4x |
out$posteriorDraws <- extraDistr::rlst(10000, v1_n, m1_n, se1) |
| 134 | 4x |
out$pdf <- pdf1 |
| 135 |
#* `Save data for plotting` |
|
| 136 | 4x |
if (plot) {
|
| 137 | 2x |
out$plot_df <- data.frame( |
| 138 | 2x |
"range" = support, |
| 139 | 2x |
"prob" = pdf1, |
| 140 | 2x |
"sample" = rep("Sample 1", length(support))
|
| 141 |
) |
|
| 142 |
} |
|
| 143 | ||
| 144 | 4x |
return(out) |
| 145 |
} |
| 1 |
#' helper function to help make growth models including an intercept term in growthSS |
|
| 2 |
#' @examples |
|
| 3 |
#' .intModelHelper("int_logistic")
|
|
| 4 |
#' .intModelHelper("logistic")
|
|
| 5 |
#' |
|
| 6 |
#' @keywords internal |
|
| 7 |
#' @noRd |
|
| 8 | ||
| 9 |
.intModelHelper <- function(model) {
|
|
| 10 | 202x |
if (grepl("int_", model)) {
|
| 11 | 9x |
int <- TRUE |
| 12 | 9x |
model <- gsub("int_", "", model)
|
| 13 |
} else {
|
|
| 14 | 193x |
int <- FALSE |
| 15 |
} |
|
| 16 | 202x |
return(list("model" = model, "int" = int))
|
| 17 |
} |
| 1 |
#' @description |
|
| 2 |
#' Internal function for Bayesian comparisosns of gaussian data represented by single value traits. |
|
| 3 |
#' This version uses the entire posterior distribution instead of the sampling distribution of the mean. |
|
| 4 |
#' In frequentist terms this is analogous to a Z test as opposed to a T test. Generally the T test is |
|
| 5 |
#' desired, but this is provided for completeness. |
|
| 6 |
#' @param s1 A vector of numerics drawn from a gaussian distribution. |
|
| 7 |
#' @examples |
|
| 8 |
#' .conj_gaussian_sv( |
|
| 9 |
#' s1 = rnorm(100, 50, 10), |
|
| 10 |
#' priors = list(mu = c(0, 0), n = c(1, 1), s2 = c(20, 20)), |
|
| 11 |
#' plot = FALSE, support = NULL |
|
| 12 |
#' ) |
|
| 13 |
#' @keywords internal |
|
| 14 |
#' @noRd |
|
| 15 | ||
| 16 |
.conj_gaussian_sv <- function(s1 = NULL, priors = NULL, |
|
| 17 |
plot = FALSE, support = NULL, cred.int.level = NULL, |
|
| 18 |
calculatingSupport = FALSE) {
|
|
| 19 | 10x |
out <- list() |
| 20 |
#* `make default prior if none provided` |
|
| 21 | 10x |
if (is.null(priors)) {
|
| 22 | 4x |
priors <- list(mu = 0, n = 1, s2 = 100) |
| 23 |
} |
|
| 24 |
#* `Get Mean, Variance, SE, and DF from s1` |
|
| 25 | ||
| 26 | 10x |
n1 <- length(s1) # n samples |
| 27 | 10x |
m1 <- mean(s1) # xbar |
| 28 | 10x |
s2_1 <- var(s1) # variance |
| 29 | 10x |
v1 <- priors$n[1] - 1 # prior DF |
| 30 | 10x |
n1_n <- priors$n[1] + n1 # total N including prior |
| 31 | 10x |
m1_n <- (n1 * m1 + priors$n[1] * priors$mu[1]) / n1_n # weighted mean of prior and data |
| 32 | 10x |
v1_n <- v1 + n1 # degrees of freedom including data |
| 33 | 10x |
s2_1_n <- ((n1 - 1) * s2_1 + v1 * priors$s2[1] + priors$n[1] * n1 * (priors$mu[1] - m1)^2 / n1_n) / |
| 34 | 10x |
v1_n # pooled variance |
| 35 | 10x |
sigma_1 <- sqrt(s2_1_n) |
| 36 |
#* `Define support if it is missing` |
|
| 37 | 10x |
if (is.null(support) && calculatingSupport) {
|
| 38 | 5x |
quantiles <- qlst(c(0.0001, 0.9999), v1_n, m1_n, sigma_1) |
| 39 | 5x |
return(quantiles) |
| 40 |
} |
|
| 41 | ||
| 42 | 5x |
dens1 <- extraDistr::dlst(support, v1_n, m1_n, sigma_1) |
| 43 | 5x |
pdf1 <- dens1 / sum(dens1) |
| 44 | 5x |
hde1_mean <- m1_n |
| 45 | 5x |
hdi1_mean <- m1_n + qt(c( |
| 46 | 5x |
(1 - cred.int.level) / 2, |
| 47 | 5x |
(1 - ((1 - cred.int.level) / 2)) |
| 48 | 5x |
), v1_n) * sigma_1 |
| 49 | ||
| 50 | 5x |
out$summary <- data.frame(HDE_1 = hde1_mean, HDI_1_low = hdi1_mean[1], HDI_1_high = hdi1_mean[2]) |
| 51 | 5x |
out$posterior$mu <- m1_n |
| 52 | 5x |
out$posterior$n <- n1_n |
| 53 | 5x |
out$posterior$s2 <- s2_1_n # return variance |
| 54 |
#* `Make Posterior Draws` |
|
| 55 | 5x |
out$posteriorDraws <- extraDistr::rlst(10000, v1_n, m1_n, sigma_1) |
| 56 | 5x |
out$pdf <- pdf1 |
| 57 |
#* `Save data for plotting` |
|
| 58 | 5x |
if (plot) {
|
| 59 | 2x |
out$plot_df <- data.frame( |
| 60 | 2x |
"range" = support, |
| 61 | 2x |
"prob" = pdf1, |
| 62 | 2x |
"sample" = rep("Sample 1", length(support))
|
| 63 |
) |
|
| 64 |
} |
|
| 65 | 5x |
return(out) |
| 66 |
} |
|
| 67 | ||
| 68 | ||
| 69 |
#' @description |
|
| 70 |
#' Internal function for Bayesian comparisons of gaussian data represented by multi value traits. |
|
| 71 |
#' This version uses the entire posterior distribution instead of the sampling distribution of the mean. |
|
| 72 |
#' In frequentist terms this is analogous to a Z test as opposed to a T test. Generally the T test is |
|
| 73 |
#' desired, but this is provided for completeness. |
|
| 74 |
#' @param s1 A vector of numerics drawn from a gaussian distribution. |
|
| 75 |
#' @keywords internal |
|
| 76 |
#' @noRd |
|
| 77 | ||
| 78 |
.conj_gaussian_mv <- function(s1 = NULL, priors = NULL, |
|
| 79 |
plot = FALSE, support = NULL, cred.int.level = NULL, |
|
| 80 |
calculatingSupport = FALSE) {
|
|
| 81 | 14x |
out <- list() |
| 82 |
#* `make default prior if none provided` |
|
| 83 | 14x |
if (is.null(priors)) {
|
| 84 | 4x |
priors <- list(mu = 0, n = 1, s2 = 100) |
| 85 |
} |
|
| 86 |
#* `Reorder columns if they are not in the numeric order` |
|
| 87 | 14x |
histColsBin <- as.numeric(sub("[a-zA-Z_.]+", "", colnames(s1)))
|
| 88 | 14x |
bins_order <- sort(histColsBin, index.return = TRUE)$ix |
| 89 | 14x |
s1 <- s1[, bins_order] |
| 90 | ||
| 91 | ||
| 92 |
#* `Turn s1 matrix into a vector` |
|
| 93 | 14x |
X1 <- rep(histColsBin[bins_order], as.numeric(round(colSums(s1)))) |
| 94 | ||
| 95 |
#* `Get Mean, Variance, SE, and DF from s2` |
|
| 96 | 14x |
n1 <- nrow(s1) # n samples |
| 97 | 14x |
m1 <- mean(X1) # xbar |
| 98 | 14x |
s2_1 <- var(X1) # var |
| 99 | ||
| 100 | 14x |
v1 <- priors$n[1] - 1 # prior DF |
| 101 | 14x |
n1_n <- priors$n[1] + n1 # total N including prior |
| 102 | 14x |
m1_n <- (n1 * m1 + priors$n[1] * priors$mu[1]) / n1_n # weighted mean of prior and data |
| 103 | 14x |
v1_n <- v1 + n1 # degrees of freedom including data |
| 104 | 14x |
s2_1_n <- ((n1 - 1) * s2_1 + v1 * priors$s2[1] + priors$n[1] * n1 * (priors$mu[1] - m1)^2 / n1_n) / |
| 105 | 14x |
v1_n # pooled variance |
| 106 | 14x |
sigma_1 <- sqrt(s2_1_n) # standard deviation |
| 107 |
#* `Define support if it is missing` |
|
| 108 | 14x |
if (is.null(support) && calculatingSupport) {
|
| 109 | 7x |
quantiles <- qlst(c(0.0001, 0.9999), v1_n, m1_n, sigma_1) |
| 110 | 7x |
return(quantiles) |
| 111 |
} |
|
| 112 | 7x |
dens <- extraDistr::dlst(support, v1_n, m1_n, sigma_1) |
| 113 | 7x |
pdf1 <- dens / sum(dens) |
| 114 | 7x |
hde1_mean <- m1_n |
| 115 | 7x |
hdi1_mean <- m1_n + qt(c((1 - cred.int.level) / 2, (1 - ((1 - cred.int.level) / 2))), v1_n) * sigma_1 |
| 116 | ||
| 117 | 7x |
out$summary <- data.frame(HDE_1 = hde1_mean, HDI_1_low = hdi1_mean[1], HDI_1_high = hdi1_mean[2]) |
| 118 | 7x |
out$posterior$mu <- m1_n |
| 119 | 7x |
out$posterior$n <- n1_n |
| 120 | 7x |
out$posterior$s2 <- s2_1_n |
| 121 |
#* `Make Posterior Draws` |
|
| 122 | 7x |
out$posteriorDraws <- extraDistr::rlst(10000, v1_n, m1_n, sigma_1) |
| 123 | 7x |
out$pdf <- pdf1 |
| 124 |
#* `Save data for plotting` |
|
| 125 | 7x |
if (plot) {
|
| 126 | 2x |
out$plot_df <- data.frame( |
| 127 | 2x |
"range" = support, |
| 128 | 2x |
"prob" = pdf1, |
| 129 | 2x |
"sample" = rep("Sample 1", length(support))
|
| 130 |
) |
|
| 131 |
} |
|
| 132 | 7x |
return(out) |
| 133 |
} |
| 1 |
#' Make Joyplots for multi value trait plantCV data |
|
| 2 |
#' |
|
| 3 |
#' @param df Data frame to use. Long or wide format is accepted. |
|
| 4 |
#' @param index If the data is long then this is a multi value trait as a |
|
| 5 |
#' character string that must be present in `trait`. |
|
| 6 |
#' If the data is wide then this is a string used to find column names to use from the wide data. |
|
| 7 |
#' In the wide case this should include the entire |
|
| 8 |
#' trait name (ie, "hue_frequencies" instead of "hue_freq"). |
|
| 9 |
#' @param group A length 1 or 2 character vector. |
|
| 10 |
#' This is used for faceting the joyplot and identifying groups for testing. |
|
| 11 |
#' If this is length 1 then no faceting is done. |
|
| 12 |
#' @param y Optionally a variable to use on the y axis. This is useful when you |
|
| 13 |
#' have three variables to display. This argument will change faceting behavior to |
|
| 14 |
#' add an additional layer of faceting (single length group will be faceted, |
|
| 15 |
#' length 2 group will be faceted group1 ~ group2). |
|
| 16 |
#' @param id Optionally a variable to show the outline of different replicates. |
|
| 17 |
#' Note that ggridges::geom_density_ridges_gradient does not support transparency, |
|
| 18 |
#' so if fillx is TRUE then only the outer line will show individual IDs. |
|
| 19 |
#' @param bin Column containing histogram (multi value trait) bins. Defaults to "label". |
|
| 20 |
#' @param freq Column containing histogram counts. Defaults to "value" |
|
| 21 |
#' @param trait Column containing phenotype names. Defaults to "trait". |
|
| 22 |
#' @param fillx Logical, whether or not to use \code{ggridges::geom_density_ridges_gradient}.
|
|
| 23 |
#' Default is T, if F then \code{ggridges::geom_density_ridges} is used instead,
|
|
| 24 |
#' with arbitrary fill. Note that \code{ggridges::geom_density_ridges_gradient}
|
|
| 25 |
#' may issue a message about deprecated ggplot2 features. |
|
| 26 |
#' @keywords multi-value-trait |
|
| 27 |
#' @import ggplot2 |
|
| 28 |
#' @import ggridges |
|
| 29 |
#' @import data.table |
|
| 30 |
#' @importFrom stats setNames density aggregate as.formula ks.test |
|
| 31 |
#' |
|
| 32 |
#' |
|
| 33 |
#' @return Returns a ggplot. |
|
| 34 |
#' |
|
| 35 |
#' @examples |
|
| 36 |
#' |
|
| 37 |
#' library(extraDistr) |
|
| 38 |
#' dists <- list( |
|
| 39 |
#' rmixnorm = list(mean = c(70, 150), sd = c(15, 5), alpha = c(0.3, 0.7)), |
|
| 40 |
#' rnorm = list(mean = 90, sd = 20), |
|
| 41 |
#' rlnorm = list(meanlog = log(40), sdlog = 0.5) |
|
| 42 |
#' ) |
|
| 43 |
#' x_wide <- mvSim( |
|
| 44 |
#' dists = dists, n_samples = 5, counts = 1000, |
|
| 45 |
#' min_bin = 1, max_bin = 180, wide = TRUE |
|
| 46 |
#' ) |
|
| 47 |
#' pcv.joyplot(x_wide, index = "sim", group = "group") |
|
| 48 |
#' x_long <- mvSim( |
|
| 49 |
#' dists = dists, n_samples = 5, counts = 1000, |
|
| 50 |
#' min_bin = 1, max_bin = 180, wide = FALSE |
|
| 51 |
#' ) |
|
| 52 |
#' x_long$trait <- "x" |
|
| 53 |
#' p <- pcv.joyplot(x_long, bin = "variable", group = "group") |
|
| 54 |
#' # we might want to display hues as their hue |
|
| 55 |
#' p + ggplot2::scale_fill_gradientn(colors = scales::hue_pal(l = 65)(360)) |
|
| 56 |
#' x_long$group2 <- "example" |
|
| 57 |
#' pcv.joyplot(x_long, bin = "variable", y = "group", fillx = FALSE) |
|
| 58 |
#' |
|
| 59 |
#' @export |
|
| 60 | ||
| 61 |
pcv.joyplot <- function(df = NULL, index = NULL, group = NULL, y = NULL, id = NULL, |
|
| 62 |
bin = "label", freq = "value", trait = "trait", fillx = TRUE) {
|
|
| 63 |
#* ***** `general calculated values` |
|
| 64 | ||
| 65 | 9x |
if (!is.null(trait) && trait %in% colnames(df)) {
|
| 66 | 6x |
mode <- "long" # if there is a trait column then use long options, |
| 67 |
} else {
|
|
| 68 | 3x |
mode <- "wide" |
| 69 |
} # else use wide options |
|
| 70 | ||
| 71 | 9x |
sub <- .joyPlotFormatData(mode, df, index, trait, bin, freq, group, y, id) |
| 72 | ||
| 73 | 9x |
if (is.null(group)) {
|
| 74 | 1x |
group <- "dummy" |
| 75 | 1x |
df$dummy <- "dummy" |
| 76 | 1x |
sub$dummy <- "dummy" |
| 77 |
} |
|
| 78 | ||
| 79 | 9x |
joyPlotFacetHelperResult <- .joyPlotFacetHelper(y, group, sub) |
| 80 | 9x |
facet_layer <- joyPlotFacetHelperResult[["facet"]] |
| 81 | 9x |
sub <- joyPlotFacetHelperResult[["data"]] |
| 82 | 9x |
sub$grouping <- interaction(sub[, c(y, group)], drop = TRUE) |
| 83 | ||
| 84 |
#* `if ID is null then aggregate, else draw with ID` |
|
| 85 | 9x |
if (is.null(id)) {
|
| 86 | 9x |
sub <- stats::aggregate(freq ~ ., data = sub, FUN = mean, na.rm = TRUE) |
| 87 | 9x |
gg <- ggplot2::ggplot(sub) |
| 88 |
} else {
|
|
| 89 | ! |
sub$id <- sub[[id]] |
| 90 | ! |
gg <- ggplot2::ggplot(sub, ggplot2::aes(alpha = 0.5, group = interaction(id, y, grouping))) |
| 91 |
} |
|
| 92 | ||
| 93 | 9x |
ggridgeLayer <- if (fillx) {
|
| 94 | 8x |
x <- NULL # to make R CMD check happy with stat(x) |
| 95 | 8x |
list( |
| 96 | 8x |
suppressMessages(ggridges::geom_density_ridges_gradient( |
| 97 | 8x |
ggplot2::aes( |
| 98 | 8x |
x = .data$bin, y = .data$y, |
| 99 | 8x |
height = .data$freq, fill = ggplot2::after_stat(x) |
| 100 |
), |
|
| 101 | 8x |
show.legend = FALSE, stat = "identity", rel_min_height = 0.001 |
| 102 |
)), |
|
| 103 | 8x |
ggplot2::scale_fill_viridis_c( |
| 104 | 8x |
option = "plasma" |
| 105 |
) |
|
| 106 |
) |
|
| 107 |
} else {
|
|
| 108 | 1x |
list( |
| 109 | 1x |
suppressMessages(ggridges::geom_density_ridges2( |
| 110 | 1x |
ggplot2::aes( |
| 111 | 1x |
x = .data$bin, y = .data$y, |
| 112 | 1x |
height = .data$freq, fill = .data[[group]], color = .data[[group]] |
| 113 |
), |
|
| 114 | 1x |
show.legend = FALSE, stat = "identity" |
| 115 |
)), |
|
| 116 | 1x |
ggplot2::scale_color_viridis_d(option = "viridis"), |
| 117 | 1x |
ggplot2::scale_fill_viridis_d(option = "viridis") |
| 118 |
) |
|
| 119 |
} |
|
| 120 | 9x |
p <- gg + |
| 121 | 9x |
facet_layer + |
| 122 | 9x |
ggridgeLayer + |
| 123 | 9x |
ggplot2::scale_x_continuous(n.breaks = 5, labels = ~ round(., 1)) + |
| 124 | 9x |
ggplot2::labs(x = index, y = c(y, group)[1]) + |
| 125 | 9x |
pcv_theme() + |
| 126 | 9x |
ggplot2::theme(legend.position = "none") |
| 127 | 9x |
return(p) |
| 128 |
} |
|
| 129 | ||
| 130 | ||
| 131 |
#' *********************************************************************************************** |
|
| 132 |
#' *************** `format data` **************************************** |
|
| 133 |
#' *********************************************************************************************** |
|
| 134 |
#' |
|
| 135 |
#' @description |
|
| 136 |
#' Internal function for formatting MV trait data |
|
| 137 |
#' |
|
| 138 |
#' @keywords internal |
|
| 139 |
#' @noRd |
|
| 140 | ||
| 141 |
.joyPlotFacetHelper <- function(y, group, sub) {
|
|
| 142 | 9x |
if (!is.null(y)) {
|
| 143 | 2x |
if (length(group) == 1) {
|
| 144 | 1x |
sub$y <- sub[[y]] |
| 145 | 1x |
facet_layer <- ggplot2::facet_grid(as.formula(paste0("~", group[1])))
|
| 146 |
} |
|
| 147 | 2x |
if (length(group) == 2) {
|
| 148 | 1x |
sub$y <- sub[[y]] |
| 149 | 1x |
facet_layer <- ggplot2::facet_grid(as.formula(paste0(group[1], "~", group[2]))) |
| 150 |
} |
|
| 151 | 2x |
sub$y <- as.character(sub$y) |
| 152 |
} else { # if y is not provided then one less layer of faceting
|
|
| 153 | 7x |
if (length(group) == 1) {
|
| 154 | 3x |
sub$y <- sub[[group]] |
| 155 | 3x |
facet_layer <- list() |
| 156 |
} |
|
| 157 | 7x |
if (length(group) == 2) {
|
| 158 | 4x |
sub$y <- sub[[group[1]]] |
| 159 | 4x |
facet_layer <- ggplot2::facet_grid(as.formula(paste0("~", group[2])))
|
| 160 |
} |
|
| 161 |
} |
|
| 162 | 9x |
return(list("data" = sub, "facet" = facet_layer))
|
| 163 |
} |
|
| 164 | ||
| 165 | ||
| 166 |
#' *********************************************************************************************** |
|
| 167 |
#' *************** `format data` **************************************** |
|
| 168 |
#' *********************************************************************************************** |
|
| 169 |
#' |
|
| 170 |
#' @description |
|
| 171 |
#' Internal function for formatting MV trait data |
|
| 172 |
#' |
|
| 173 |
#' @keywords internal |
|
| 174 |
#' @noRd |
|
| 175 | ||
| 176 |
.joyPlotFormatData <- function(mode, df, index, trait, bin, freq, group, y, id) {
|
|
| 177 |
#* if long data then subset rows where trait is correct |
|
| 178 | 9x |
if (mode == "long") {
|
| 179 | 6x |
if (is.null(index)) {
|
| 180 | 2x |
sub <- df |
| 181 |
} else {
|
|
| 182 | 4x |
sub <- df[df[[trait]] == index, ] |
| 183 |
} |
|
| 184 | 6x |
if (length(unique(sub[[trait]])) > 1) {
|
| 185 | ! |
warning("More than one trait found, consider an `index` argument")
|
| 186 |
} |
|
| 187 | 6x |
sub$bin <- as.numeric(sub[[bin]]) |
| 188 | 6x |
sub$freq <- as.numeric(sub[[freq]]) |
| 189 | 6x |
sub <- sub[, c(group, y, id, "bin", "freq", trait)] |
| 190 | 3x |
} else if (mode == "wide") { # if wide then get column names that contain index string
|
| 191 |
#* subset data to only have index columns |
|
| 192 |
#* turn the data longer |
|
| 193 | 3x |
sub_wide <- data.table::as.data.table( |
| 194 | 3x |
df[, which(colnames(df) %in% c(group, y, id) | grepl(index, colnames(df)))] |
| 195 |
) |
|
| 196 | 3x |
sub <- as.data.frame(data.table::melt(sub_wide, |
| 197 | 3x |
id.vars = c(group, y, id), |
| 198 | 3x |
variable.name = trait, value.name = freq |
| 199 |
)) |
|
| 200 | 3x |
sub[[bin]] <- sub(index, "", sub[[trait]]) |
| 201 | 3x |
sub$bin <- as.numeric(regmatches(sub[[bin]], regexpr("[0-9].*", sub[[bin]])))
|
| 202 | 3x |
sub[[trait]] <- index |
| 203 | 3x |
sub$freq <- as.numeric(sub[[freq]]) |
| 204 | 3x |
sub <- sub[, c(group, y, id, "bin", "freq", trait)] |
| 205 |
} |
|
| 206 | 9x |
return(sub) |
| 207 |
} |
| 1 |
#' @description |
|
| 2 |
#' Internal function for calculating the beta distribution of the success rate of a bernoulli |
|
| 3 |
#' distribution represented by single value traits. |
|
| 4 |
#' @param s1 A vector of numerics drawn from a uniform distribution. |
|
| 5 |
#' @examples |
|
| 6 |
#' out <- .conj_bernoulli_sv( |
|
| 7 |
#' s1 = sample(c(TRUE, FALSE), 10, prob = c(0.3, 0.7), replace = TRUE), |
|
| 8 |
#' cred.int.level = 0.95, |
|
| 9 |
#' plot = TRUE |
|
| 10 |
#' ) |
|
| 11 |
#' lapply(out, head) |
|
| 12 |
#' @keywords internal |
|
| 13 |
#' @noRd |
|
| 14 |
.conj_bernoulli_sv <- function(s1 = NULL, priors = NULL, |
|
| 15 |
plot = FALSE, support = NULL, cred.int.level = NULL, |
|
| 16 |
calculatingSupport = FALSE) {
|
|
| 17 |
#* `make default prior if none provided` |
|
| 18 | 5x |
if (is.null(priors)) {
|
| 19 | 5x |
priors <- list(a = 0.5, b = 0.5) |
| 20 |
} |
|
| 21 | 5x |
if (!is.logical(s1)) {
|
| 22 | 1x |
stop("Bernoulli data must be supplied as a logical vector")
|
| 23 |
} |
|
| 24 |
#* `Update beta prior with sufficient statistics` |
|
| 25 | 4x |
a1_prime <- priors$a[1] + sum(s1) |
| 26 | 4x |
b1_prime <- priors$b[1] + sum(!s1) |
| 27 |
#* `Define support if it is missing` |
|
| 28 | 4x |
if (is.null(support) && calculatingSupport) {
|
| 29 | 2x |
return(c(0.0001, 0.9999)) |
| 30 |
} |
|
| 31 | 2x |
out <- list() |
| 32 |
#* `Make Posterior Draws` |
|
| 33 | 2x |
out$posteriorDraws <- rbeta(10000, a1_prime, b1_prime) |
| 34 |
#* `posterior` |
|
| 35 | 2x |
dens1 <- dbeta(support, a1_prime, b1_prime) |
| 36 | 2x |
pdf1 <- dens1 / sum(dens1) |
| 37 | 2x |
out$pdf <- pdf1 |
| 38 |
#* `calculate highest density interval` |
|
| 39 | 2x |
hdi1 <- qbeta(c((1 - cred.int.level) / 2, (1 - ((1 - cred.int.level) / 2))), a1_prime, b1_prime) |
| 40 |
#* `calculate highest density estimate`` |
|
| 41 | 2x |
hde1 <- .betaHDE(a1_prime, b1_prime) |
| 42 |
#* `Store summary` |
|
| 43 | 2x |
out$summary <- data.frame(HDE_1 = hde1, HDI_1_low = hdi1[1], HDI_1_high = hdi1[2]) |
| 44 | 2x |
out$posterior <- list("a" = a1_prime, "b" = b1_prime)
|
| 45 |
#* `save s1 data for plotting` |
|
| 46 | 2x |
if (plot) {
|
| 47 | 2x |
out$plot_df <- data.frame( |
| 48 | 2x |
"range" = support, |
| 49 | 2x |
"prob" = pdf1, |
| 50 | 2x |
"sample" = rep("Sample 1", length(support))
|
| 51 |
) |
|
| 52 |
} |
|
| 53 | 2x |
return(out) |
| 54 |
} |
| 1 |
#' Visualizing igraph networks |
|
| 2 |
#' |
|
| 3 |
#' @description Easy igraph visualization with pcv.net output |
|
| 4 |
#' |
|
| 5 |
#' |
|
| 6 |
#' @param net Network object similar to that returned from pcv.net, having dataframes named "edges" |
|
| 7 |
#' and "nodes" |
|
| 8 |
#' @param fill Variable name(s) from the nodes data to be used to color points. By default "strength" |
|
| 9 |
#' is used. |
|
| 10 |
#' @param shape Optional discrete variable name(s) from the nodes data to be used to change the shape |
|
| 11 |
#' of points. If this variable is numeric it will be coerced to character. |
|
| 12 |
#' @param size Size of points, defaults to 3. |
|
| 13 |
#' @param edgeWeight Edge dataframe column to weight connections between nodes. Defaults to "emd" |
|
| 14 |
#' for compatability with \code{pcv.emd}.
|
|
| 15 |
#' @param edgeFilter How should edges be filtered? This can be either a numeric (0.5) |
|
| 16 |
#' in which case it is taken as a filter where only edges with values greater than or equal to |
|
| 17 |
#' that number are kept or a character string ("0.5") in which case the strongest
|
|
| 18 |
#' X percentage of edges are kept. This defaults to NULL which does no filtering, |
|
| 19 |
#' although that should not be considered the best standard behaviour. See details. |
|
| 20 |
#' @import ggplot2 |
|
| 21 |
#' @importFrom stats quantile |
|
| 22 |
#' |
|
| 23 |
#' @keywords emd network |
|
| 24 |
#' @examples |
|
| 25 |
#' |
|
| 26 |
#' library(extraDistr) |
|
| 27 |
#' dists <- list( |
|
| 28 |
#' rmixnorm = list(mean = c(70, 150), sd = c(15, 5), alpha = c(0.3, 0.7)), |
|
| 29 |
#' rnorm = list(mean = 90, sd = 3) |
|
| 30 |
#' ) |
|
| 31 |
#' x <- mvSim( |
|
| 32 |
#' dists = dists, n_samples = 5, counts = 1000, |
|
| 33 |
#' min_bin = 1, max_bin = 180, wide = TRUE |
|
| 34 |
#' ) |
|
| 35 |
#' emd_df <- pcv.emd(x, |
|
| 36 |
#' cols = "sim", reorder = c("group"), mat = FALSE,
|
|
| 37 |
#' plot = FALSE, parallel = 1 |
|
| 38 |
#' ) |
|
| 39 |
#' net <- pcv.net(emd_df, meta = "group") |
|
| 40 |
#' net.plot(net) |
|
| 41 |
#' net.plot(net, edgeFilter = "0.25") |
|
| 42 |
#' net.plot(net, |
|
| 43 |
#' edgeFilter = 0.25, fill = c("degree", "group"),
|
|
| 44 |
#' shape = c("degree", "group")
|
|
| 45 |
#' ) |
|
| 46 |
#' net.plot(net, |
|
| 47 |
#' edgeFilter = 0.25, fill = c("degree", "group"),
|
|
| 48 |
#' shape = c("degree")
|
|
| 49 |
#' ) |
|
| 50 |
#' |
|
| 51 |
#' @return Returns a ggplot of a network. |
|
| 52 |
#' |
|
| 53 |
#' @export |
|
| 54 |
#' |
|
| 55 | ||
| 56 |
net.plot <- function(net, fill = "strength", shape = NULL, size = 3, edgeWeight = "emd", |
|
| 57 |
edgeFilter = NULL) {
|
|
| 58 | 9x |
nodes <- net[["nodes"]] |
| 59 | 9x |
edges <- net[["edges"]] |
| 60 | 9x |
if (is.null(fill)) {
|
| 61 | ! |
fill <- "NOFILL" |
| 62 | ! |
nodes$NOFILL <- "a" |
| 63 |
} |
|
| 64 | 9x |
if (length(fill) > 1) {
|
| 65 | 2x |
nodes$FILL <- interaction(nodes[, fill]) |
| 66 | 2x |
fill <- "FILL" |
| 67 |
} |
|
| 68 | 9x |
if (is.null(shape)) {
|
| 69 | 7x |
shape <- "NOSHAPE" |
| 70 | 7x |
nodes$NOSHAPE <- "a" |
| 71 |
} |
|
| 72 | 9x |
if (length(shape) > 1) {
|
| 73 | 1x |
nodes$SHAPE <- interaction(nodes[, shape]) |
| 74 | 1x |
shape <- "SHAPE" |
| 75 |
} |
|
| 76 | 9x |
if (is.numeric(nodes[[shape]])) {
|
| 77 | 1x |
nodes[[shape]] <- as.character(nodes[[shape]]) |
| 78 |
} |
|
| 79 | 9x |
if (!is.null(edgeFilter)) {
|
| 80 | 3x |
if (is.character(edgeFilter)) {
|
| 81 | 1x |
cutoff <- quantile(edges[[edgeWeight]], probs = as.numeric(edgeFilter)) |
| 82 | 1x |
edges <- edges[edges[[edgeWeight]] >= as.numeric(cutoff), ] |
| 83 | 2x |
} else if (is.numeric(edgeFilter)) {
|
| 84 | 2x |
edges <- edges[edges[[edgeWeight]] >= edgeFilter, ] |
| 85 |
} |
|
| 86 | 3x |
nodes <- nodes[nodes$index %in% c(edges$from, edges$to), ] |
| 87 |
} |
|
| 88 | ||
| 89 | ||
| 90 | 9x |
p <- ggplot2::ggplot(nodes) + |
| 91 | 9x |
ggplot2::geom_segment(data = edges, ggplot2::aes( |
| 92 | 9x |
x = .data$from.x, xend = .data$to.x, y = .data$from.y, yend = .data$to.y, |
| 93 | 9x |
linewidth = .data[[edgeWeight]] |
| 94 | 9x |
), colour = "black", alpha = 0.1) + |
| 95 | 9x |
ggplot2::geom_point(data = nodes, size = size, ggplot2::aes( |
| 96 | 9x |
x = .data$V1, y = .data$V2, |
| 97 | 9x |
fill = .data[[fill]], color = .data[[fill]], |
| 98 | 9x |
shape = .data[[shape]] |
| 99 | 9x |
), alpha = 1, show.legend = TRUE) + |
| 100 | 9x |
ggplot2::scale_linewidth(range = c(0.1, 1.5)) + |
| 101 |
#* note that scaling shape should work, but there is a documented ggplot2 |
|
| 102 |
#* bug where this messes up the legend, so |
|
| 103 |
#* until that is fixed I will not specify fillable shapes. |
|
| 104 | 9x |
ggplot2::guides(linewidth = "none", shape = ggplot2::guide_legend(nrow = 1), fill = "none") + |
| 105 | 9x |
ggplot2::theme_void() + |
| 106 | 9x |
ggplot2::theme(legend.position = "bottom") |
| 107 | 9x |
if (length(fill == 1) && fill == "NOFILL") {
|
| 108 | ! |
p <- p + ggplot2::guides(color = "none") |
| 109 |
} |
|
| 110 | 9x |
if (length(shape) == 1 && shape == "NOSHAPE") {
|
| 111 | 7x |
p <- p + ggplot2::guides(shape = "none") |
| 112 |
} |
|
| 113 | 9x |
return(p) |
| 114 |
} |
| 1 |
#' Read in lemnatech watering data from metadata.json files |
|
| 2 |
#' |
|
| 3 |
#' @param file Path to a json file of lemnatech metadata. |
|
| 4 |
#' @param envKey Character string representing the json key for environment data. |
|
| 5 |
#' By default this is set to "environment". |
|
| 6 |
#' Currently there are no situations where this makes sense to change. |
|
| 7 |
#' @keywords watering json |
|
| 8 |
#' @import jsonlite |
|
| 9 |
#' @importFrom utils type.convert |
|
| 10 |
#' @return A data frame containing the bellwether watering data |
|
| 11 |
#' @examples |
|
| 12 |
#' |
|
| 13 |
#' w <- bw.water("https://raw.githubusercontent.com/joshqsumner/pcvrTestData/main/metadata.json")
|
|
| 14 |
#' |
|
| 15 |
#' @export |
|
| 16 | ||
| 17 |
bw.water <- function(file = NULL, envKey = "environment") {
|
|
| 18 | 2x |
meta <- jsonlite::fromJSON(txt = file) |
| 19 | 2x |
env <- as.data.frame(do.call(rbind, meta[[envKey]])) |
| 20 | 2x |
env$snapshot <- rownames(env) |
| 21 | 2x |
rownames(env) <- NULL |
| 22 | 2x |
env <- as.data.frame(apply(env, 2, as.character)) |
| 23 | 2x |
env <- type.convert(env, as.is = TRUE) |
| 24 | 2x |
if ("timestamp" %in% colnames(env)) {
|
| 25 | 2x |
tryCatch( |
| 26 |
{
|
|
| 27 | 2x |
env$timestamp <- as.POSIXct(env$timestamp, tryFormats = c( |
| 28 | 2x |
"%Y-%m-%d %H:%M:%OS", |
| 29 | 2x |
"%Y-%m-%dT%H:%M:%OS", |
| 30 | 2x |
"%Y/%m/%d %H:%M:%OS", |
| 31 | 2x |
"%Y-%m-%d %H:%M", |
| 32 | 2x |
"%Y/%m/%d %H:%M", |
| 33 | 2x |
"%Y-%m-%d", |
| 34 | 2x |
"%Y/%m/%d" |
| 35 | 2x |
), tz = "UTC") |
| 36 | 2x |
begin <- min(env$timestamp, na.rm = TRUE) |
| 37 | 2x |
message(paste0( |
| 38 | 2x |
"Using the first watering time, ", begin, |
| 39 | 2x |
", as beginning of experiment to assign DAS" |
| 40 |
)) |
|
| 41 | 2x |
env$DAS <- as.numeric((env$timestamp - begin) / 24 / 60 / 60) |
| 42 |
}, |
|
| 43 | 2x |
error = function(err) {},
|
| 44 | 2x |
warning = function(warn) {}
|
| 45 |
) |
|
| 46 |
} |
|
| 47 | 2x |
return(env) |
| 48 |
} |
| 1 |
#' Calculate relative tolerance of some phenotype(s) relative to control |
|
| 2 |
#' |
|
| 3 |
#' @description Often in bellwether experiments we are curious about the effect of some |
|
| 4 |
#' treatment vs control. For certain routes in analysing the data this requires considering |
|
| 5 |
#' phenotypes as relative differences compared to a control. Note that the \code{conjugate}
|
|
| 6 |
#' function can also be useful in considering the relative tolerance to stress between groups and that |
|
| 7 |
#' growth models are another suggested way to test relative tolerance questions. |
|
| 8 |
#' |
|
| 9 |
#' @param df Dataframe to use, this can be in long or wide format. |
|
| 10 |
#' @param phenotypes A character vector of column names for the phenotypes |
|
| 11 |
#' that should be compared against control. |
|
| 12 |
#' @param grouping A character vector of column names that identify groups in the data. |
|
| 13 |
#' These groups will be calibrated separately, |
|
| 14 |
#' with the exception of the group that identifies a control within the greater hierarchy. |
|
| 15 |
#' Note that for levels of grouping where the control group does not exist the output will be NA. |
|
| 16 |
#' @param control A column name for the variable to be used to select the control observations. |
|
| 17 |
#' If left NULL (the default) then this will be taken as the first string in the group argument. |
|
| 18 |
#' @param controlGroup The level of the control variable to compare groups against. |
|
| 19 |
#' @param traitCol Column with phenotype names, defaults to "trait". |
|
| 20 |
#' This should generally not need to be changed from the default. |
|
| 21 |
#' If this and valueCol are present in colnames(df) then the data |
|
| 22 |
#' is assumed to be in long format. |
|
| 23 |
#' @param valueCol Column with phenotype values, defaults to "value". |
|
| 24 |
#' This should generally not need to be changed from the default. |
|
| 25 |
#' @return A dataframe with relative tolerance columns added. |
|
| 26 |
#' @importFrom stats sd setNames |
|
| 27 |
#' @keywords single-value-trait |
|
| 28 |
#' @examples |
|
| 29 |
#' \donttest{
|
|
| 30 |
#' sv <- read.pcv( |
|
| 31 |
#' "https://raw.githubusercontent.com/joshqsumner/pcvrTestData/main/pcv4-single-value-traits.csv", |
|
| 32 |
#' reader = "fread" |
|
| 33 |
#' ) |
|
| 34 |
#' sv$genotype <- substr(sv$barcode, 3, 5) |
|
| 35 |
#' sv$genotype <- ifelse(sv$genotype == "002", "B73", |
|
| 36 |
#' ifelse(sv$genotype == "003", "W605S", |
|
| 37 |
#' ifelse(sv$genotype == "004", "MM", "Mo17") |
|
| 38 |
#' ) |
|
| 39 |
#' ) |
|
| 40 |
#' sv$fertilizer <- substr(sv$barcode, 8, 8) |
|
| 41 |
#' sv$fertilizer <- ifelse(sv$fertilizer == "A", "100", |
|
| 42 |
#' ifelse(sv$fertilizer == "B", "50", "0") |
|
| 43 |
#' ) |
|
| 44 |
#' |
|
| 45 |
#' sv <- bw.time(sv, |
|
| 46 |
#' plantingDelay = 0, phenotype = "area_pixels", |
|
| 47 |
#' cutoff = 10, timeCol = "timestamp", group = c("barcode", "rotation"), plot = FALSE
|
|
| 48 |
#' ) |
|
| 49 |
#' phenotypes <- colnames(sv)[19:35] |
|
| 50 |
#' phenoForm <- paste0("cbind(", paste0(phenotypes, collapse = ", "), ")")
|
|
| 51 |
#' groupForm <- "DAS+DAP+barcode+genotype+fertilizer" |
|
| 52 |
#' form <- as.formula(paste0(phenoForm, "~", groupForm)) |
|
| 53 |
#' sv <- aggregate(form, data = sv, mean, na.rm = TRUE) |
|
| 54 |
#' sv <- bw.outliers(sv, |
|
| 55 |
#' phenotype = "area_pixels", |
|
| 56 |
#' group = c("DAS", "genotype", "fertilizer"),
|
|
| 57 |
#' plotgroup = c("barcode")
|
|
| 58 |
#' )$data |
|
| 59 |
#' |
|
| 60 |
#' pixels_per_cmsq <- 42.5^2 # pixel per cm^2 |
|
| 61 |
#' sv$area_cm2 <- sv$area_pixels / pixels_per_cmsq |
|
| 62 |
#' sv$height_cm <- sv$height_pixels / 42.5 |
|
| 63 |
#' |
|
| 64 |
#' df <- sv |
|
| 65 |
#' phenotypes <- c("area_cm2", "height_cm")
|
|
| 66 |
#' grouping <- c("fertilizer", "genotype", "DAS")
|
|
| 67 |
#' controlGroup <- "100" |
|
| 68 |
#' control <- "fertilizer" |
|
| 69 |
#' |
|
| 70 |
#' rt <- relativeTolerance(df, phenotypes, grouping, control, controlGroup) |
|
| 71 |
#' head(rt) |
|
| 72 |
#' sapply(rt, function(c) sum(is.na(c))) |
|
| 73 |
#' } |
|
| 74 |
#' |
|
| 75 |
#' @export |
|
| 76 |
#' |
|
| 77 |
relativeTolerance <- function(df, phenotypes = NULL, grouping = NULL, control = NULL, |
|
| 78 |
controlGroup = NULL, traitCol = "trait", valueCol = "value") {
|
|
| 79 | 1x |
if (all(c(traitCol, valueCol) %in% colnames(df))) {
|
| 80 | ! |
wide <- FALSE |
| 81 |
} else {
|
|
| 82 | 1x |
wide <- TRUE |
| 83 |
} |
|
| 84 | 1x |
if (is.null(grouping)) {
|
| 85 | ! |
grouping <- control |
| 86 |
} |
|
| 87 | 1x |
if (is.null(control)) {
|
| 88 | ! |
control <- grouping[1] |
| 89 |
} |
|
| 90 | 1x |
if (is.null(controlGroup)) {
|
| 91 | ! |
controlGroup <- unique(df[[control]])[1] |
| 92 |
} |
|
| 93 | ||
| 94 | 1x |
if (control %in% grouping) {
|
| 95 | 1x |
group_no_control <- grouping[grouping != control] |
| 96 |
} else {
|
|
| 97 | ! |
group_no_control <- grouping |
| 98 |
} |
|
| 99 | 1x |
group_no_control_factor <- interaction(df[, group_no_control]) |
| 100 | 1x |
datsp <- split(x = df, f = group_no_control_factor) |
| 101 | ||
| 102 |
#* if Z = x/y |
|
| 103 |
#* x ~ N(mu_1, sd_1) |
|
| 104 |
#* y ~ N(mu_2, sd_2) |
|
| 105 |
#* mu_z = mu_1/mu_2 |
|
| 106 |
#* sd_z = sqrt( ((sd_1 / mu_1)^2) + ((sd_2/mu_2)^2) ) * mu_1/mu_2 |
|
| 107 | ||
| 108 |
#* `Wide` |
|
| 109 | 1x |
if (wide) {
|
| 110 | 1x |
d2 <- do.call(rbind, lapply(datsp, function(d) {
|
| 111 | 84x |
d_res <- do.call(rbind, lapply(phenotypes, function(pheno) {
|
| 112 | 168x |
ctrl_mean <- mean(d[d[[control]] == controlGroup, pheno]) |
| 113 | 168x |
ctrl_se <- sd(d[d[[control]] == controlGroup, pheno]) / |
| 114 | 168x |
length(d[d[[control]] == controlGroup, pheno]) |
| 115 | 168x |
pheno_res <- do.call(rbind, lapply(setdiff(unique(d[[control]]), controlGroup), function(cg) {
|
| 116 |
#* experimental group parameters |
|
| 117 | 304x |
mu_eg <- mean(d[d[[control]] == cg, pheno]) |
| 118 | 304x |
se_eg <- sd(d[d[[control]] == cg, pheno]) / length(d[d[[control]] == cg, pheno]) |
| 119 |
#* relative center and propogated error |
|
| 120 | 304x |
mu_rel <- mu_eg / ctrl_mean |
| 121 | 304x |
se_rel <- sqrt(((se_eg / mu_eg)^2) + ((ctrl_se / ctrl_mean)^2)) * mu_rel |
| 122 | 304x |
setNames( |
| 123 | 304x |
data.frame( |
| 124 | 304x |
cg, d[1, group_no_control], pheno, mu_rel, se_rel, |
| 125 | 304x |
mu_eg, se_eg, ctrl_mean, ctrl_se |
| 126 |
), |
|
| 127 | 304x |
c( |
| 128 | 304x |
control, group_no_control, "phenotype", "mu_rel", "se_rel", |
| 129 | 304x |
"mu_trt", "se_trt", "mu_control", "se_control" |
| 130 |
) |
|
| 131 |
) |
|
| 132 |
})) |
|
| 133 | 168x |
pheno_res |
| 134 |
})) |
|
| 135 | 84x |
d_res |
| 136 |
})) |
|
| 137 | 1x |
rownames(d2) <- NULL |
| 138 |
} else { #* `Long`
|
|
| 139 | ! |
d2 <- do.call(rbind, lapply(datsp, function(d) {
|
| 140 | ! |
d_res <- do.call(rbind, lapply(phenotypes, function(pheno) {
|
| 141 | ! |
ctrl_mean <- mean(d[d[[control]] == controlGroup & d[[traitCol]] == pheno, valueCol]) |
| 142 | ! |
ctrl_se <- sd(d[d[[control]] == controlGroup & d[[traitCol]] == pheno, valueCol]) / |
| 143 | ! |
length(d[d[[control]] == controlGroup & d[[traitCol]] == pheno, valueCol]) |
| 144 | ! |
pheno_res <- do.call(rbind, lapply(setdiff(unique(d[[control]]), controlGroup), function(cg) {
|
| 145 |
#* experimental group parameters |
|
| 146 | ! |
mu_eg <- mean(d[d[[control]] == cg & d[[traitCol]] == pheno, valueCol]) |
| 147 | ! |
se_eg <- sd(d[d[[control]] == cg, pheno]) / |
| 148 | ! |
length(d[d[[control]] == cg & d[[traitCol]] == pheno, valueCol]) |
| 149 |
#* relative center and propogated error |
|
| 150 | ! |
mu_rel <- mu_eg / ctrl_mean |
| 151 | ! |
se_rel <- sqrt(((se_eg / mu_eg)^2) + ((ctrl_se / ctrl_mean)^2)) * mu_rel |
| 152 | ! |
setNames( |
| 153 | ! |
data.frame( |
| 154 | ! |
cg, d[1, group_no_control], pheno, mu_rel, se_rel, |
| 155 | ! |
mu_eg, se_eg, ctrl_mean, ctrl_se |
| 156 |
), |
|
| 157 | ! |
c( |
| 158 | ! |
control, group_no_control, "phenotype", "mu_rel", "se_rel", |
| 159 | ! |
"mu_trt", "se_trt", "mu_control", "se_control" |
| 160 |
) |
|
| 161 |
) |
|
| 162 |
})) |
|
| 163 | ! |
pheno_res |
| 164 |
})) |
|
| 165 | ! |
d_res |
| 166 |
})) |
|
| 167 | ! |
rownames(d2) <- NULL |
| 168 |
} |
|
| 169 | 1x |
return(d2) |
| 170 |
} |
| 1 |
#' Function to visualize \code{survival::survreg} models fit by \code{fitGrowth}.
|
|
| 2 |
#' |
|
| 3 |
#' Models fit using \link{growthSS} inputs by \link{fitGrowth}
|
|
| 4 |
#' (and similar models made through other means) can be visualized easily using this function. |
|
| 5 |
#' This will generally be called by \code{growthPlot}.
|
|
| 6 |
#' |
|
| 7 |
#' @param fit A model fit returned by \code{fitGrowth} with type="nls".
|
|
| 8 |
#' @param form A formula similar to that in \code{growthSS} inputs (or the \code{pcvrForm}
|
|
| 9 |
#' part of the output) specifying the outcome, |
|
| 10 |
#' predictor, and grouping structure of the data as \code{outcome ~ predictor|individual/group}.
|
|
| 11 |
#' If the individual and group are specified then the observed growth lines are plotted. |
|
| 12 |
#' @param groups An optional set of groups to keep in the plot. |
|
| 13 |
#' Defaults to NULL in which case all groups in the model are plotted. |
|
| 14 |
#' @param df A dataframe to use in plotting observed growth curves on top of the model. |
|
| 15 |
#' This must be supplied for nls models. |
|
| 16 |
#' @param timeRange Ignored, included for compatibility with other plotting functions. |
|
| 17 |
#' @param facetGroups logical, should groups be separated in facets? Defaults to TRUE. |
|
| 18 |
#' @param groupFill logical, should groups have different colors? Defaults to FALSE. |
|
| 19 |
#' If TRUE then viridis colormaps are used in the order of virMaps |
|
| 20 |
#' @param virMaps order of viridis maps to use. Will be recycled to necessary length. Defaults to |
|
| 21 |
#' "plasma", but will generally be informed by growthPlot's default. |
|
| 22 |
#' @keywords survival |
|
| 23 |
#' @importFrom methods is |
|
| 24 |
#' @import ggplot2 |
|
| 25 |
#' @importFrom stats predict |
|
| 26 |
#' @examples |
|
| 27 |
#' |
|
| 28 |
#' |
|
| 29 |
#' df <- growthSim("logistic",
|
|
| 30 |
#' n = 20, t = 25, |
|
| 31 |
#' params = list("A" = c(200, 160), "B" = c(13, 11), "C" = c(3, 3.5))
|
|
| 32 |
#' ) |
|
| 33 |
#' ss <- growthSS( |
|
| 34 |
#' model = "survival weibull", form = y > 100 ~ time | id / group, |
|
| 35 |
#' df = df, type = "survreg" |
|
| 36 |
#' ) |
|
| 37 |
#' fit <- fitGrowth(ss) |
|
| 38 |
#' survregPlot(fit, form = ss$pcvrForm, df = ss$df) |
|
| 39 |
#' survregPlot(fit, form = ss$pcvrForm, df = ss$df, groups = "a") |
|
| 40 |
#' survregPlot(fit, |
|
| 41 |
#' form = ss$pcvrForm, df = ss$df, facetGroups = FALSE, |
|
| 42 |
#' groupFill = TRUE, virMaps = c("plasma", "mako")
|
|
| 43 |
#' ) |
|
| 44 |
#' |
|
| 45 |
#' @return Returns a ggplot showing an survival model's survival function. |
|
| 46 |
#' |
|
| 47 |
#' @export |
|
| 48 | ||
| 49 |
survregPlot <- function(fit, form, groups = NULL, df = NULL, timeRange = NULL, facetGroups = TRUE, |
|
| 50 |
groupFill = FALSE, virMaps = c("plasma")) {
|
|
| 51 |
#* `parse formula` |
|
| 52 | 3x |
parsed_form <- .parsePcvrForm(form, df) |
| 53 | 3x |
x <- parsed_form$x |
| 54 | 3x |
group <- parsed_form$group |
| 55 | 3x |
df <- parsed_form$data |
| 56 |
#* `filter by groups if groups != NULL` |
|
| 57 | 3x |
if (!is.null(groups)) {
|
| 58 | 1x |
df <- df[df[[group]] %in% groups, ] |
| 59 |
} else {
|
|
| 60 | 2x |
groups <- unique(df[[group]]) |
| 61 |
} |
|
| 62 |
#* `generate predictions` |
|
| 63 | 3x |
pct <- seq(0.01, 0.99, 0.01) |
| 64 | 3x |
preds <- predict(fit, |
| 65 | 3x |
newdata = data.frame("group" = groups),
|
| 66 | 3x |
type = "quantile", p = pct, se.fit = TRUE |
| 67 |
) |
|
| 68 | 3x |
preds <- lapply(preds, function(d) {
|
| 69 | 6x |
matrix(d, nrow = length(groups), ncol = length(pct)) |
| 70 |
}) |
|
| 71 | 3x |
pred_df <- stats::setNames(as.data.frame(t(preds$fit)), c(paste0("est_", groups)))
|
| 72 | 3x |
pred_df <- cbind(pred_df, stats::setNames(as.data.frame(t(preds$se.fit)), c(paste0("se_", groups))))
|
| 73 | 3x |
pred_df$pct <- 1 - pct |
| 74 | 3x |
dt <- data.table::as.data.table(pred_df) |
| 75 | 3x |
dt_long <- data.table::melt(dt, id.vars = "pct") |
| 76 | 3x |
dt_long$group <- gsub(".*_", "", dt_long$variable)
|
| 77 | 3x |
dt_long$variable <- gsub("_.*", "", dt_long$variable)
|
| 78 | 3x |
preds <- data.frame(data.table::dcast(dt_long, pct + group ~ variable, value.var = "value")) |
| 79 |
#* `facetGroups` |
|
| 80 | 3x |
if (facetGroups) {
|
| 81 | 2x |
facet_layer <- ggplot2::facet_wrap(stats::as.formula(paste0("~", group)))
|
| 82 |
} else {
|
|
| 83 | 1x |
facet_layer <- NULL |
| 84 |
} |
|
| 85 |
#* `groupFill` |
|
| 86 | 3x |
if (groupFill) {
|
| 87 | 1x |
virVals <- lapply(rep(virMaps, length.out = length(unique(df[[group]]))), function(pal) {
|
| 88 | 2x |
viridis::viridis(3, begin = 0.1, option = pal) |
| 89 |
}) |
|
| 90 | 1x |
names(virVals) <- groups |
| 91 | 1x |
color_scale <- ggplot2::scale_color_manual(values = unlist(lapply(virVals, function(pal) pal[3]))) |
| 92 |
} else {
|
|
| 93 | 2x |
virVals <- lapply(rep("plasma", length.out = length(unique(df[[group]]))), function(pal) {
|
| 94 | 3x |
viridis::viridis(3, begin = 0.1, option = pal) |
| 95 |
}) |
|
| 96 | 2x |
names(virVals) <- groups |
| 97 | 2x |
color_scale <- ggplot2::scale_color_manual(values = unlist(lapply(virVals, function(pal) pal[3]))) |
| 98 |
} |
|
| 99 |
#* `Make ggplot` |
|
| 100 | 3x |
p <- ggplot2::ggplot(preds, ggplot2::aes( |
| 101 | 3x |
x = .data[["est"]], |
| 102 | 3x |
y = .data[["pct"]], group = .data[[group]] |
| 103 |
)) + |
|
| 104 | 3x |
facet_layer + |
| 105 | 3x |
lapply(groups, function(grp) {
|
| 106 | 5x |
ggplot2::geom_ribbon(data = preds[preds[[group]] == grp, ], ggplot2::aes( |
| 107 | 5x |
xmin = .data[["est"]] - (2 * .data[["se"]]), |
| 108 | 5x |
xmax = .data[["est"]] + (2 * .data[["se"]]) |
| 109 | 5x |
), fill = virVals[[grp]][1], alpha = 0.5) |
| 110 |
}) + |
|
| 111 | 3x |
lapply(groups, function(grp) {
|
| 112 | 5x |
ggplot2::geom_ribbon(data = preds[preds[[group]] == grp, ], ggplot2::aes( |
| 113 | 5x |
xmin = .data[["est"]] - (1 * .data[["se"]]), |
| 114 | 5x |
xmax = .data[["est"]] + (1 * .data[["se"]]) |
| 115 | 5x |
), fill = virVals[[grp]][2], alpha = 0.5) |
| 116 |
}) + |
|
| 117 | 3x |
ggplot2::geom_line(ggplot2::aes(color = .data[[group]]), show.legend = FALSE) + |
| 118 | 3x |
color_scale + |
| 119 | 3x |
ggplot2::scale_y_continuous(labels = scales::label_percent()) + |
| 120 | 3x |
ggplot2::labs(x = x, y = "Survival") + |
| 121 | 3x |
pcv_theme() |
| 122 |
#* `Add data as KM line` |
|
| 123 | 3x |
if (!is.null(df)) {
|
| 124 | 3x |
km_df <- do.call(rbind, lapply(groups, function(grp) {
|
| 125 | 5x |
sub <- df[df[[group]] == grp, ] |
| 126 | 5x |
do.call(rbind, lapply(seq(0, max(df[[x]]), 1), function(ti) {
|
| 127 | 92x |
sum_events <- sum(c(sub[as.numeric(sub[[x]]) <= ti, "event"], 0)) |
| 128 | 92x |
n_at_risk <- nrow(sub) - sum_events |
| 129 | 92x |
surv_pct <- n_at_risk / nrow(sub) |
| 130 | 92x |
iter <- data.frame( |
| 131 | 92x |
group = grp, time = ti, events = sum_events, |
| 132 | 92x |
at_risk = n_at_risk, surv_pct = surv_pct |
| 133 |
) |
|
| 134 | 92x |
colnames(iter)[1] <- group |
| 135 | 92x |
iter |
| 136 |
})) |
|
| 137 |
})) |
|
| 138 | 3x |
p <- p + ggplot2::geom_line(data = km_df, ggplot2::aes( |
| 139 | 3x |
x = .data[[x]], |
| 140 | 3x |
y = .data[["surv_pct"]], |
| 141 | 3x |
group = .data[[group]], |
| 142 | 3x |
linetype = .data[[group]] |
| 143 | 3x |
), color = "black", show.legend = FALSE) |
| 144 |
} |
|
| 145 | ||
| 146 | ||
| 147 | 3x |
return(p) |
| 148 |
} |
| 1 |
#' @description |
|
| 2 |
#' Negative binomial via conjugate beta prior GIVEN A KNOWN R |
|
| 3 |
#' conjugacy: |
|
| 4 |
#' if counts ~ nbinom(n, r) |
|
| 5 |
#' where n is the number of successful trials |
|
| 6 |
#' and r is the prob of success per trial which is fixed and known |
|
| 7 |
#' |
|
| 8 |
#' if r is known then p ~ beta(A, B) |
|
| 9 |
#' note that the compendium of conjugate priors seems to have a typo for this relationship, |
|
| 10 |
#' but the wikipedia conjugate prior article is correct |
|
| 11 |
#' \code{A' = A + r*n}
|
|
| 12 |
#' \code{B' = B + sum(X)}
|
|
| 13 |
#' |
|
| 14 |
#' Using MoM: |
|
| 15 |
#' |
|
| 16 |
#' \bar{x} = k(1-p)/p
|
|
| 17 |
#' s^2 = \bar{x}/p
|
|
| 18 |
#' r = \bar{x}^2 / (s^2 - \bar{x})
|
|
| 19 |
#' p = \bar{x}/s^2
|
|
| 20 |
#' |
|
| 21 |
#' @param s1 A vector of numerics drawn from a negative binomial distribution. |
|
| 22 |
#' @examples |
|
| 23 |
#' .conj_negbin_sv( |
|
| 24 |
#' s1 = rnbinom(10, 10, 0.5), |
|
| 25 |
#' priors = NULL, |
|
| 26 |
#' plot = FALSE, |
|
| 27 |
#' cred.int.level = 0.89 |
|
| 28 |
#' ) |
|
| 29 |
#' @keywords internal |
|
| 30 |
#' @noRd |
|
| 31 | ||
| 32 |
.conj_negbin_sv <- function(s1 = NULL, priors = NULL, |
|
| 33 |
plot = FALSE, support = NULL, cred.int.level = NULL, |
|
| 34 |
calculatingSupport = FALSE) {
|
|
| 35 |
#* `Check samples` |
|
| 36 | 9x |
if (any(abs(s1 - round(s1)) > .Machine$double.eps^0.5) || any(s1 < 0)) {
|
| 37 | 1x |
stop("Only positive whole numbers can be used in the Negative Binomial distribution")
|
| 38 |
} |
|
| 39 |
#* `make default prior if none provided` |
|
| 40 | 8x |
if (is.null(priors)) {
|
| 41 | 4x |
priors <- list(r = 10, a = 0.5, b = 0.5) # beta prior on P |
| 42 | 4x |
warning(paste0( |
| 43 | 4x |
"True value of r for negative binomial distribution has defaulted to 10,", |
| 44 | 4x |
" you should add a prior including r parameter." |
| 45 |
)) |
|
| 46 |
} |
|
| 47 | ||
| 48 | 8x |
out <- list() |
| 49 | ||
| 50 |
#* `Use conjugate beta prior on probability` |
|
| 51 |
#* Note that this is very sensitive to the R value being appropriate |
|
| 52 | 8x |
a1_prime <- priors$a[1] + priors$r[1] * length(s1) |
| 53 | 8x |
b1_prime <- priors$b[1] + sum(s1) |
| 54 |
#* `Define support if it is missing` |
|
| 55 | 8x |
if (is.null(support) && calculatingSupport) {
|
| 56 | 4x |
return(c(0.0001, 0.9999)) |
| 57 |
} |
|
| 58 |
#* `calculate density over support`` |
|
| 59 | 4x |
dens1 <- dbeta(support, a1_prime, b1_prime) |
| 60 | 4x |
pdf1 <- dens1 / sum(dens1) |
| 61 | ||
| 62 |
#* `calculate highest density interval` |
|
| 63 | 4x |
hdi1 <- qbeta(c((1 - cred.int.level) / 2, (1 - ((1 - cred.int.level) / 2))), a1_prime, b1_prime) |
| 64 | ||
| 65 |
#* `calculate highest density estimate`` |
|
| 66 | 4x |
hde1 <- .betaHDE(a1_prime, b1_prime) |
| 67 | ||
| 68 |
#* `save summary and parameters` |
|
| 69 | 4x |
out$summary <- data.frame(HDE_1 = hde1, HDI_1_low = hdi1[1], HDI_1_high = hdi1[2]) |
| 70 | 4x |
out$posterior$r <- priors$r[1] |
| 71 | 4x |
out$posterior$a <- a1_prime |
| 72 | 4x |
out$posterior$b <- b1_prime |
| 73 |
#* `Make Posterior Draws` |
|
| 74 | 4x |
out$posteriorDraws <- rbeta(10000, a1_prime, b1_prime) |
| 75 | 4x |
out$pdf <- pdf1 |
| 76 |
#* `keep data for plotting` |
|
| 77 | 4x |
if (plot) {
|
| 78 | 2x |
out$plot_df <- data.frame( |
| 79 | 2x |
"range" = support, "prob" = pdf1, |
| 80 | 2x |
"sample" = rep("Sample 1", length(support))
|
| 81 |
) |
|
| 82 |
} |
|
| 83 | 4x |
return(out) |
| 84 |
} |
| 1 |
#' Ease of use growth model helper function. |
|
| 2 |
#' |
|
| 3 |
#' Output from this should be passed to \link{fitGrowth} to fit the specified model.
|
|
| 4 |
#' |
|
| 5 |
#' @param model The name of a model as a character string. |
|
| 6 |
#' Supported options are c("logistic", "gompertz", "weibull", "frechet", "gumbel", "monomolecular",
|
|
| 7 |
#' "exponential", "linear", "power law", "bragg", "lorentz", "beta", |
|
| 8 |
#' "double logistic", "double gompertz", "gam", "int"), with "int" representing an intercept only model |
|
| 9 |
#' which is only used in brms (and is expected to only be used in threshold models or to model |
|
| 10 |
#' homoskedasticity). Note that the dose response curves (bragg, lorentz, and beta) may be difficult |
|
| 11 |
#' to fit using the \code{nlme} backend but should work well using other options.
|
|
| 12 |
#' See \code{\link{growthSim}} for examples of each type of single parameterized growth curve
|
|
| 13 |
#' ("gam" is not supported in \code{growthSim}).
|
|
| 14 |
#' You can also specify decay models by including the "decay" keyword with the model name. Note that |
|
| 15 |
#' using "decay" is only necessary for the brms backend since otherwise the priors are strictly |
|
| 16 |
#' positive. |
|
| 17 |
#' In brms models the entire formula is negated for decay models so that lognormal priors can |
|
| 18 |
#' still be used when at least some coefficients would be negative. |
|
| 19 |
#' Additionally, the "int_" prefix may be added to a model name to specify that an intercept should |
|
| 20 |
#' be included. By default these models are assumed to have intercepts at 0, which is often fine. |
|
| 21 |
#' If you include an intercept in a brms model then you would specify the prior as you would for an |
|
| 22 |
#' "A", "B", or "C" parameter but as "I". By default growthSS will make student T priors for intercept |
|
| 23 |
#' parameters in the same way that it will for estimated changepoints (see below). |
|
| 24 |
#' With type="brms" you can also specify segmented models by combining model names with a plus sign |
|
| 25 |
#' such as "linear + linear". In a segmented model the names for parameters do not follow the normal |
|
| 26 |
#' "A", "B", "C" notation, instead they are named for the type of model, the position in the formula, |
|
| 27 |
#' then for the parameter of that model. There will also be parameters to represent the time when |
|
| 28 |
#' growth switches from one model to another called either "changepointX" or "fixedChangePointX". |
|
| 29 |
#' All "changePointX" terms are estimated as parameters of the model. |
|
| 30 |
#' "fixedChangePointX" parameters are not estimated and are kept as the numeric value given in the |
|
| 31 |
#' priors, this is useful if your experiment has an intervention at a set time which you expect to |
|
| 32 |
#' change the growth process acutely. |
|
| 33 |
#' For the "linear + linear" example this would yield parameters "linear1A", "changePoint1" |
|
| 34 |
#' (or "fixedChangePoint1"), and "linear2A". A "linear + gompertz" model would have |
|
| 35 |
#' "linear1A", "changePoint1", "gompertz2A", "gompertz2B", and "gompertz2C" for parameters. |
|
| 36 |
#' Note that double sigmoid models are not supported as parts of segmented models and gams |
|
| 37 |
#' can currently only be included as the last part of a segmented model. When using a changepoint model |
|
| 38 |
#' it may be worth using segments that are simpler to fit |
|
| 39 |
#' (gompertz instead of EVD options, for instance). Currently "homo" and "int" are treated the same |
|
| 40 |
#' and "spline" and "gam" are interchangeable. Time-to-event models can be specified using the |
|
| 41 |
#' "survival" keyword, see details for an explanation of the changes that entails. |
|
| 42 |
#' Similarly, using the brms backend response distributions (see \code{brms::brmsfamily})
|
|
| 43 |
#' can be specified in the model as "family: model" so that a model |
|
| 44 |
#' of logistic increasing counts may be written as \code{model = "poisson: logistic"}.
|
|
| 45 |
#' @param form A formula describing the model. The left hand side should only be |
|
| 46 |
#' the outcome variable (phenotype), and a cutoff if you are making a survival model (see details). |
|
| 47 |
#' The right hand side needs at least the x variable |
|
| 48 |
#' (typically time). Grouping is also described in this formula using roughly lme4 |
|
| 49 |
#' style syntax,with formulas like \code{y~time|individual/group} to show that predictors
|
|
| 50 |
#' should vary by \code{group} and autocorrelation between \code{individual:group}
|
|
| 51 |
#' interactions should be modeled. Note that autocorrelation is only modeled with the "brms" |
|
| 52 |
#' backend in this way. "nlme" requires random effects and correlations to use the same grouping, |
|
| 53 |
#' so autocorrelation using the "nlme" backend works at the group level, so will slightly underestimate |
|
| 54 |
#' the autocorrelation at the individual level. If group has only one level or is not included then |
|
| 55 |
#' it will be ignored in formulas for growth and variance (this may be the case if |
|
| 56 |
#' you split data before fitting models to be able to run more smaller models each more quickly). |
|
| 57 |
#' Hierarchical models can be specified for the brms backend as |
|
| 58 |
#' \code{y~time+other_covariate|individual/group} in which case the parameters of the main growth model
|
|
| 59 |
#' will themselves be estimated by models as specified in the \code{hierarchy} argument. For instance,
|
|
| 60 |
#' if normally "A" had an intercept for each \code{group}, now it would be predicted as
|
|
| 61 |
#' \code{A ~ AI + AA * covariate} where AI and AA now have an intercept for each \code{group}.
|
|
| 62 |
#' @param sigma Other models for distributional parameters. |
|
| 63 |
#' This argument is only used with "brms" and "nlme" models and is handled differently for each. |
|
| 64 |
#' When type="brms" this can be supplied as a model or as a list of models. |
|
| 65 |
#' It is turned into a formula (or list of formulas) with an entry corresponding to each distributional |
|
| 66 |
#' parameter (after the mean) of the growth model family. |
|
| 67 |
#' If no family was specified (\code{model="logistic"} for instance) then the student T distribution
|
|
| 68 |
#' is used, with additional distributional parameters sigma and nu. |
|
| 69 |
#' To check the naming of distributional parameters in each response family use |
|
| 70 |
#' \code{brms::brmsfamily("family")$dpars}. The supported options are the same as the model options
|
|
| 71 |
#' (including threshold models). |
|
| 72 |
#' For distributional parameters that do not have a formula specified they will be modeled as |
|
| 73 |
#' intercept only (not by group). |
|
| 74 |
#' Parameter names are the same as those in the main model but with the distributional parameter name |
|
| 75 |
#' as a prefix. Additionally, if a linear model is used for sigma then it can be modeled with or without |
|
| 76 |
#' a prior, if a prior is specified ("sigmaA") then a non-linear formula is used and the "sigmaA"
|
|
| 77 |
#' parameter will be included in the output instead of the default "sigma" term. |
|
| 78 |
#' In the rare case that you wish to model the mean and the 3rd distributional parameter but not |
|
| 79 |
#' the 2nd then \code{sigma = list("not_estimated", "model")} would allow for that.
|
|
| 80 |
#' When type ="nlme" the options are more limited to c("none", "power", "exp"), corresponding to using
|
|
| 81 |
#' \code{nlme::varIdent}, \code{nlme::varPower}, or \code{nlme::varExp} respectively where "power"
|
|
| 82 |
#' is the default. |
|
| 83 |
#' @param df A dataframe to use. Must contain all the variables listed in the formula. |
|
| 84 |
#' @param pars Optionally specify which parameters should change by group. Not this is model |
|
| 85 |
#' dependent and is not implemented for brms models due to their more flexible hypothesis testing. |
|
| 86 |
#' @param start An optional named list of starting values OR means for prior distributions. |
|
| 87 |
#' If this is not provided then starting values are picked with \code{stats::selfStart}.
|
|
| 88 |
#' When type = "brms" these should be provided and are treated as the means of |
|
| 89 |
#' lognormal priors for all growth model parameters and T_5(mu, 3) priors for changepoint parameters. |
|
| 90 |
#' This is done because the values are strictly positive and the lognormal distribution |
|
| 91 |
#' is easily interpreted. The changepoint priors are T distributions for symmetry, 5 DF |
|
| 92 |
#' having been chosen for heavy but not unmanageable tails. |
|
| 93 |
#' If this argument is not provided then priors are made using brms::get_prior. |
|
| 94 |
#' Those priors are unlikely to be suitable and a different set of priors will need to be made |
|
| 95 |
#' for the model using \code{brms::set_prior} for good convergence. When specifying starting
|
|
| 96 |
#' values/prior means think of this as being similar to the \code{params} argument
|
|
| 97 |
#' in \code{growthSim}. Names should correspond to parameter names from the
|
|
| 98 |
#' \code{model} argument. A numeric vector can also be used, but specifying
|
|
| 99 |
#' names is best practice for clarity. Additionally, due to a limitation in |
|
| 100 |
#' \code{brms} currently lower bounds cannot be set for priors for specific groups.
|
|
| 101 |
#' If priors include multiple groups (\code{start = list(A = c(10,15), ...)}) then
|
|
| 102 |
#' you will see warnings after the model is fit about not having specified a lower |
|
| 103 |
#' bound explicitly. Those warnings can safely be ignored and will be addressed if |
|
| 104 |
#' the necessary features are added to \code{brms}. See details for guidance.
|
|
| 105 |
#' @param type Type of model to fit, options are "brms", "nlrq", "nlme", "nls", and "mgcv". |
|
| 106 |
#' Note that the "mgcv" option only supports "gam" models. |
|
| 107 |
#' Survival models can use the "survreg" model type |
|
| 108 |
#' (this will be called if any non-brms/flexsurv type is given) or the "flexsurv" model type |
|
| 109 |
#' which requires the flexsurv package to be installed. |
|
| 110 |
#' Note that for non-brms models variables in the model will be labeled by the factor level of the |
|
| 111 |
#' group, not necessarily by the group name. |
|
| 112 |
#' This is done for ease of use with different modeling functions, the levels are alphabetically sorted |
|
| 113 |
#' and can be checked using: |
|
| 114 |
#' \code{table(ss$df$group, ss$df$group_numericLabel)}.
|
|
| 115 |
#' @param tau A vector of quantiles to fit for nlrq models. |
|
| 116 |
#' @param hierarchy Optionally a list of model parameters that should themselves by modeled by another |
|
| 117 |
#' predictor variable. This is only used with the brms backend. |
|
| 118 |
#' @keywords Bayesian brms nls nlme nlrq mgcv longitudinal |
|
| 119 |
#' |
|
| 120 |
#' @importFrom stats as.formula rgamma |
|
| 121 |
#' |
|
| 122 |
#' @details |
|
| 123 |
#' |
|
| 124 |
#' Default priors are not provided, but these can serve as starting points for each distribution. |
|
| 125 |
#' You are encouraged to use \code{growthSim} to consider what kind
|
|
| 126 |
#' of trendlines result from changes to your prior and for interpretation of each parameter. |
|
| 127 |
#' The \link{plotPrior} function can be used to do prior predictive checks.
|
|
| 128 |
#' You should not looking back and forth at your data trying to match your |
|
| 129 |
#' observed growth exactly with a prior distribution, |
|
| 130 |
#' rather this should be informed by an understanding of the plants you |
|
| 131 |
#' are using and expectations based on previous research. |
|
| 132 |
#' For the "double" models the parameter interpretation is the same |
|
| 133 |
#' as for their non-double counterparts except that there are A and A2, etc. |
|
| 134 |
#' It is strongly recommended to familiarize yourself with the double sigmoid |
|
| 135 |
#' distributions using growthSim before attempting to model one. Additionally, |
|
| 136 |
#' those distributions are intended for use with long delays in an experiment, |
|
| 137 |
#' think stress recovery experiments, not for minor hiccups in plant growth. |
|
| 138 |
#' |
|
| 139 |
#' \itemize{
|
|
| 140 |
#' \item \bold{Logistic}: \code{list('A' = 130, 'B' = 12, 'C' = 3)}
|
|
| 141 |
#' \item \bold{Gompertz}: \code{list('A' = 130, 'B' = 12, 'C' = 1.25)}
|
|
| 142 |
#' \item \bold{Weibull}: \code{list('A' = 130, 'B' = 2, 'C' = 2)}
|
|
| 143 |
#' \item \bold{Frechet}: \code{list('A' = 130, 'B' = 5, 'C' = 6)}
|
|
| 144 |
#' \item \bold{Gumbel}: \code{list('A' = 130, 'B' = 6, 'C' = 4)}
|
|
| 145 |
#' \item \bold{Double Logistic}: \code{list('A' = 130, 'B' = 12, 'C' = 3,
|
|
| 146 |
#' 'A2' = 200, 'B2' = 25, 'C2' = 1)} |
|
| 147 |
#' \item \bold{Double Gompertz}: \code{list('A' = 130, 'B' = 12, 'C' = 0.25,
|
|
| 148 |
#' 'A2' = 220, 'B2' = 30, 'C2' = 0.1)} |
|
| 149 |
#' \item \bold{Monomolecular}: \code{list('A' = 130, 'B' = 2)}
|
|
| 150 |
#' \item \bold{Exponential}: \code{list('A' = 15, 'B' = 0.1)}
|
|
| 151 |
#' \item \bold{Linear}: \code{list('A' = 1)}
|
|
| 152 |
#' \item \bold{Power Law}: \code{list('A' = 13, 'B' = 2)}
|
|
| 153 |
#' } |
|
| 154 |
#' |
|
| 155 |
#' See details below about parameterization for each model option. |
|
| 156 |
#' \itemize{
|
|
| 157 |
#' \item \bold{Logistic}: `A / (1 + exp( (B-x)/C) )`
|
|
| 158 |
#' Where A is the asymptote, B is the inflection point, C is the growth rate. |
|
| 159 |
#' \item \bold{Gompertz}: `A * exp(-B * exp(-C*x))`
|
|
| 160 |
#' Where A is the asymptote, B is the inflection point, C is the growth rate. |
|
| 161 |
#' \item \bold{Weibull}: `A * (1-exp(-(x/C)^B))`
|
|
| 162 |
#' Where A is the asymptote, B is the weibull shape parameter, C is the weibull scale parameter. |
|
| 163 |
#' \item \bold{Frechet}: `A * exp(-((x-0)/C)^(-B))`
|
|
| 164 |
#' Where A is the asymptote, B is the frechet shape parameter, C is the frechet scale parameter. |
|
| 165 |
#' Note that the location parameter (conventionally m) is 0 in these models for simplicity but is still |
|
| 166 |
#' included in the formula. |
|
| 167 |
#' \item \bold{Gumbel}: `A * exp(-exp(-(x-B)/C))`
|
|
| 168 |
#' Where A is the asymptote, B is the inflection point (location), C is the growth rate (scale). |
|
| 169 |
#' \item \bold{Double Logistic}: `A / (1+exp((B-x)/C)) + ((A2-A) /(1+exp((B2-x)/C2)))`
|
|
| 170 |
#' Where A is the asymptote, B is the inflection point, C is the growth rate, |
|
| 171 |
#' A2 is the second asymptote, B2 is the second inflection point, and C2 is the second |
|
| 172 |
#' growth rate. |
|
| 173 |
#' \item \bold{Double Gompertz}: `A * exp(-B * exp(-C*x)) + ((A2-A) * exp(-B2 * exp(-C2*(x-B))))`
|
|
| 174 |
#' Where A is the asymptote, B is the inflection point, C is the growth rate, |
|
| 175 |
#' A2 is the second asymptote, B2 is the second inflection point, and C2 is the second |
|
| 176 |
#' growth rate. |
|
| 177 |
#' \item \bold{Monomolecular}: `A-A * exp(-B * x)`
|
|
| 178 |
#' Where A is the asymptote and B is the growth rate. |
|
| 179 |
#' \item \bold{Exponential}: `A * exp(B * x)`
|
|
| 180 |
#' Where A is the scale parameter and B is the growth rate. |
|
| 181 |
#' \item \bold{Linear}: `A * x`
|
|
| 182 |
#' Where A is the growth rate. |
|
| 183 |
#' \item \bold{Power Law}: `A * x^(B)`
|
|
| 184 |
#' Where A is the scale parameter and B is the growth rate. |
|
| 185 |
#' \item \bold{Bragg}: `A * exp(-B * (x - C) ^ 2)`
|
|
| 186 |
#' This models minima and maxima as a dose-response curve where A is the max response, |
|
| 187 |
#' B is the "precision" or slope at inflection, and C is the x position of the max response. |
|
| 188 |
#' \item \bold{Lorentz}: `A / (1 + B * (x - C) ^ 2)`
|
|
| 189 |
#' This models minima and maxima as a dose-response curve where A is the max response, |
|
| 190 |
#' B is the "precision" or slope at inflection, and C is the x position of the max response. |
|
| 191 |
#' Generally Bragg is preferred to Lorentz for dose-response curves. |
|
| 192 |
#' \item \bold{Beta}: `A * (((x - D) / (C - D)) * ((E - x) / (E - C)) ^ ((E - C) / (C - D))) ^ B`
|
|
| 193 |
#' This models minima and maxima as a dose-response curve where A is the Maximum Value, |
|
| 194 |
#' B is a shape/concavity exponent similar to the sum of alpha and beta in a Beta distribution, |
|
| 195 |
#' C is the position of maximum value, D is the minimum position where distribution > 0, |
|
| 196 |
#' E is the maximum position where distribution > 0. |
|
| 197 |
#' This is a difficult model to fit but can model non-symmetric dose-response relationships which |
|
| 198 |
#' may sometimes be worth the extra effort. |
|
| 199 |
#' } |
|
| 200 |
#' Note that for these distributions parameters do not exist in a vacuum. |
|
| 201 |
#' Changing one will make the others look different in the resulting data. |
|
| 202 |
#' The \code{growthSim} function can be helpful in familiarizing further with these distributions.
|
|
| 203 |
#' |
|
| 204 |
#' Using the \code{brms} backend the \code{sigma} argument optionally specifies a sub model to account
|
|
| 205 |
#' for heteroskedasticity. |
|
| 206 |
#' Any combination of models (except for decay models) can be specified in the \code{sigma} term.
|
|
| 207 |
#' If you need variance to raise and lower then a gam/spline is the most appropriate option. |
|
| 208 |
#' |
|
| 209 |
#' Using the \code{brms} backend a model with lots of parameters may be difficult to estimate if there
|
|
| 210 |
#' are lots of groups. |
|
| 211 |
#' If you have very many levels of your "group" variable in a complex model then consider fitting models |
|
| 212 |
#' to subsets of the "group" variable and using \link{combineDraws} to make a data.frame for
|
|
| 213 |
#' hypothesis testing. |
|
| 214 |
#' |
|
| 215 |
#' Limits on the Y variable can be specified in the \code{brms} backend. This should generally be
|
|
| 216 |
#' unnecessary and will make the model slower to fit and potentially more difficult to set priors on. |
|
| 217 |
#' If you do have a limited phenotype (besides the normal positive constraint for growth models) |
|
| 218 |
#' then this may be helpful, one situation may be canopy coverage percentage which is naturally bounded |
|
| 219 |
#' at an upper and lower limit. |
|
| 220 |
#' To specify these limits add square brackets to the Y term with upper and lower limits such as |
|
| 221 |
#' \code{"y[0,100] ~ time|id/group"}. Other "Additional response information" such as resp_weights or
|
|
| 222 |
#' standard errors can be specified using the \code{brms} backend, with those options documented fully
|
|
| 223 |
#' in the \code{brms::brmsformula} details.
|
|
| 224 |
#' |
|
| 225 |
#' There are also three supported submodel options for \code{nlme} models, but a \code{varFunc} object
|
|
| 226 |
#' can also be supplied, see \code{?nlme::varClasses}.
|
|
| 227 |
#' |
|
| 228 |
#' \itemize{
|
|
| 229 |
#' \item \bold{none}: \code{varIdent(1|group)}, which models a constant variance separately for each
|
|
| 230 |
#' group. |
|
| 231 |
#' \item \bold{power}: \code{varPower(x|group)}, which models variance as a power of x per group.
|
|
| 232 |
#' \item \bold{exp}: \code{varExp(x|group)}, which models variance as an exponent of x per group.
|
|
| 233 |
#' } |
|
| 234 |
#' |
|
| 235 |
#' Survival models can be fit using the "survival" keyword in the model specification. |
|
| 236 |
#' Using the "brms" backend (type argument) you can specify "weibull" (the default) or "binomial" for |
|
| 237 |
#' the distribution to use in that model so that the final model string would be "survival binomial" or |
|
| 238 |
#' "survival weibull" which is equivalent to "survival". Time to event data is very different than |
|
| 239 |
#' standard phenotype data, so the formula argument should include a cutoff for the Y variable to count |
|
| 240 |
#' as an "event". For example, if you were checking germination using area and wanted to use 50 pixels |
|
| 241 |
#' as a germinated plant your formula would be \code{area > 50 ~ time|id/group}.
|
|
| 242 |
#' Internally the input dataframe will be converted to time-to-event data based on that formula. |
|
| 243 |
#' Alternatively you can make your own time to event data and supply that to growthSS. In that case your |
|
| 244 |
#' data should have columns called "n_events" |
|
| 245 |
#' (number of individuals experiencing the event at this time) and "n_eligible" |
|
| 246 |
#' (number of individuals who had not experienced the event at least up to this time) |
|
| 247 |
#' for the binomial model family OR "event" (binary 1,0 for TRUE, FALSE) for the Weibull model family. |
|
| 248 |
#' Note that since these are linear models using different model families the priors are handled |
|
| 249 |
#' differently. For survival models the default priors are weak regularizing priors (Normal(0,5)) |
|
| 250 |
#' on all parameters. If you wish to specify your own priors you can supply them as brmsprior objects |
|
| 251 |
#' or as a list such as \code{priors = list("group1" = c(0,3), "group2" = c(0,1))} where the order of
|
|
| 252 |
#' values is Mu, Sigma. |
|
| 253 |
#' Any non-brms backend will instead use \code{survival::survreg} to fit the model unless the
|
|
| 254 |
#' "flexsurv" type is specified. |
|
| 255 |
#' Distributions will be passed to \code{survreg} where options are "weibull", "exponential",
|
|
| 256 |
#' "gaussian", "logistic","lognormal" and "loglogistic" if type = "survreg" or to |
|
| 257 |
#' \code{flexsurv::flexsurvreg} if type = "flexsurv" where options are "gengamma", "gengamma.orig",
|
|
| 258 |
#' "genf", "genf.orig", "weibull", "gamma", "exp", "llogis", "lnorm", "gompertz", "exponential", |
|
| 259 |
#' and "lognormal". In \code{flexsurvreg} distributional modeling is supported and additional
|
|
| 260 |
#' formula can be passed as a list to the sigma argument of growthSS in the same way as to the anc |
|
| 261 |
#' argument of \code{flexsurv::flexsurvreg}.
|
|
| 262 |
#' Further additional arguments should be supplied via \code{fitGrowth} if desired.
|
|
| 263 |
#' |
|
| 264 |
#' |
|
| 265 |
#' |
|
| 266 |
#' @return A named list of elements to make it easier to fit non linear growth models with several R |
|
| 267 |
#' packages. |
|
| 268 |
#' |
|
| 269 |
#' For \code{brms} models the output contains:
|
|
| 270 |
#' |
|
| 271 |
#' \code{formula}: A \code{brms::bf} formula specifying the growth model, autocorrelation,
|
|
| 272 |
#' variance submodel, and models for each variable in the growth model. |
|
| 273 |
#' \code{prior}: A brmsprior/data.frame object.
|
|
| 274 |
#' \code{initfun}: A function to randomly initialize chains using a random draw from a gamma
|
|
| 275 |
#' distribution (confines initial values to positive and makes correct number |
|
| 276 |
#' of initial values for chains and groups). |
|
| 277 |
#' \code{df} The data input, with dummy variables added if needed and a column to link groups to their
|
|
| 278 |
#' factor levels. |
|
| 279 |
#' \code{family} The model family, currently this will always be "student".
|
|
| 280 |
#' \code{pcvrForm} The form argument unchanged. This is returned so that
|
|
| 281 |
#' it can be used later on in model visualization. Often it may be a good idea |
|
| 282 |
#' to save the output of this function with the fit model, so having this can |
|
| 283 |
#' be useful later on. |
|
| 284 |
#' |
|
| 285 |
#' For \code{quantreg::nlrq} models the output contains:
|
|
| 286 |
#' |
|
| 287 |
#' \code{formula}: An \code{nls} style formula specifying the growth model with groups if specified.
|
|
| 288 |
#' \code{taus}: The quantiles to be fit
|
|
| 289 |
#' \code{start}: The starting values, typically these will be generated from the growth model and your
|
|
| 290 |
#' data in a similar way as shown in \code{stats::selfStart} models.
|
|
| 291 |
#' \code{df} The input data for the model.
|
|
| 292 |
#' \code{pcvrForm} The form argument unchanged.
|
|
| 293 |
#' |
|
| 294 |
#' For \code{nls} models the output is the same as for \code{quantreg::nlrq} models but without
|
|
| 295 |
#' \code{taus} returned.
|
|
| 296 |
#' |
|
| 297 |
#' For \code{nlme::nlme} models the output contains:
|
|
| 298 |
#' |
|
| 299 |
#' \code{formula}: An list of \code{nlme} style formulas specifying the model, fixed and random effects,
|
|
| 300 |
#' random effect grouping, and variance model (weights). |
|
| 301 |
#' \code{start}: The starting values, typically these will be generated from the growth model and your
|
|
| 302 |
#' data in a similar way as shown in \code{stats::selfStart} models.
|
|
| 303 |
#' \code{df} The input data for the model.
|
|
| 304 |
#' \code{pcvrForm} The form argument unchanged.
|
|
| 305 |
#' |
|
| 306 |
#' For all models the type and model are also returned for simplicity downstream. |
|
| 307 |
#' |
|
| 308 |
#' @examples |
|
| 309 |
#' |
|
| 310 |
#' |
|
| 311 |
#' simdf <- growthSim("logistic",
|
|
| 312 |
#' n = 20, t = 25, |
|
| 313 |
#' params = list("A" = c(200, 160), "B" = c(13, 11), "C" = c(3, 3.5))
|
|
| 314 |
#' ) |
|
| 315 |
#' ss <- growthSS( |
|
| 316 |
#' model = "logistic", form = y ~ time | id / group, |
|
| 317 |
#' sigma = "spline", df = simdf, |
|
| 318 |
#' start = list("A" = 130, "B" = 12, "C" = 3), type = "brms"
|
|
| 319 |
#' ) |
|
| 320 |
#' lapply(ss, class) |
|
| 321 |
#' ss$initfun() |
|
| 322 |
#' # the next step would typically be compiling/fitting the model |
|
| 323 |
#' # here we use very few chains and very few iterations for speed, but more of both is better. |
|
| 324 |
#' \donttest{
|
|
| 325 |
#' fit_test <- fitGrowth(ss, |
|
| 326 |
#' iter = 500, cores = 1, chains = 1, backend = "cmdstanr", |
|
| 327 |
#' control = list(adapt_delta = 0.999, max_treedepth = 20) |
|
| 328 |
#' ) |
|
| 329 |
#' } |
|
| 330 |
#' |
|
| 331 |
#' |
|
| 332 |
#' # formulas and priors will look different if there is only one group in the data |
|
| 333 |
#' |
|
| 334 |
#' ex <- growthSim("linear", n = 20, t = 25, params = list("A" = 2))
|
|
| 335 |
#' ex_ss <- growthSS( |
|
| 336 |
#' model = "linear", form = y ~ time | id / group, sigma = "spline", |
|
| 337 |
#' df = ex, start = list("A" = 1), type = "brms"
|
|
| 338 |
#' ) |
|
| 339 |
#' |
|
| 340 |
#' ex_ss$prior # no coef level grouping for priors |
|
| 341 |
#' ex_ss$formula # intercept only model for A |
|
| 342 |
#' |
|
| 343 |
#' ex2 <- growthSim("linear", n = 20, t = 25, params = list("A" = c(2, 2.5)))
|
|
| 344 |
#' ex2_ss <- growthSS( |
|
| 345 |
#' model = "linear", form = y ~ time | id / group, sigma = "spline", |
|
| 346 |
#' df = ex2, start = list("A" = 1), type = "brms"
|
|
| 347 |
#' ) |
|
| 348 |
#' ex2_ss$prior # has coef level grouping for priors |
|
| 349 |
#' ex2_ss$formula # specifies an A intercept for each group and splines by group for sigma |
|
| 350 |
#' |
|
| 351 |
#' @export |
|
| 352 | ||
| 353 |
growthSS <- function(model, form, sigma = NULL, df, start = NULL, |
|
| 354 |
pars = NULL, type = "brms", tau = 0.5, hierarchy = NULL) {
|
|
| 355 | 122x |
type_matched <- match.arg(type, choices = c( |
| 356 | 122x |
"brms", "nlrq", "nls", |
| 357 | 122x |
"nlme", "mgcv", "survreg", |
| 358 | 122x |
"flexsurv" |
| 359 |
)) |
|
| 360 |
# check for survival model |
|
| 361 | 122x |
surv <- .survModelParser(model) |
| 362 | 122x |
survivalBool <- surv$survival |
| 363 | 122x |
model <- surv$model |
| 364 |
# check for intercept |
|
| 365 | 122x |
int_res <- .intModelHelper(model) |
| 366 | 122x |
int <- int_res$int |
| 367 | 122x |
model <- int_res$model |
| 368 | ||
| 369 | 122x |
if (survivalBool) {
|
| 370 | 2x |
if (type_matched == "brms") {
|
| 371 | ! |
res <- .brmsSurvSS(model = model, form = form, df = df, priors = start) |
| 372 | ! |
res$type <- type_matched |
| 373 | 2x |
} else if (type_matched == "flexsurv") {
|
| 374 | 1x |
res <- .flexSurvSS(model = model, form = form, df = df, anc = sigma) |
| 375 | 1x |
res$type <- "flexsurv" |
| 376 |
} else {
|
|
| 377 | 1x |
res <- .survSS(model = model, form = form, df = df) |
| 378 | 1x |
res$type <- "survreg" |
| 379 |
} |
|
| 380 |
} else {
|
|
| 381 | 120x |
if (type_matched == "brms") {
|
| 382 | 39x |
res <- .brmSS( |
| 383 | 39x |
model = model, form = form, sigma = sigma, df = df, priors = start, int = int, |
| 384 | 39x |
hierarchy = hierarchy |
| 385 |
) |
|
| 386 | 81x |
} else if (type_matched %in% c("nlrq", "nls")) {
|
| 387 | 57x |
res <- .nlrqSS( |
| 388 | 57x |
model = model, form = form, tau = tau, df = df, start = start, pars = pars, |
| 389 | 57x |
type = type, int = int |
| 390 |
) |
|
| 391 | 24x |
} else if (type_matched == "nlme") {
|
| 392 | 21x |
if (is.null(sigma)) {
|
| 393 | 16x |
sigma <- "power" |
| 394 |
} |
|
| 395 | 21x |
res <- .nlmeSS( |
| 396 | 21x |
model = model, form = form, sigma = sigma, df = df, pars = pars, |
| 397 | 21x |
start = start, int = int |
| 398 |
) |
|
| 399 | 3x |
} else if (type_matched == "mgcv") {
|
| 400 | 3x |
res <- .mgcvSS(model = model, form = form, df = df) |
| 401 |
} |
|
| 402 | 119x |
res$type <- type |
| 403 |
} |
|
| 404 | 121x |
res$model <- model |
| 405 | 121x |
res$call <- match.call() |
| 406 | 121x |
return(res) |
| 407 |
} |
| 1 |
#' Function to parse survival model specifications in growthSS |
|
| 2 |
#' |
|
| 3 |
#' @param model the model specified for growthSS |
|
| 4 |
#' |
|
| 5 |
#' @return A list. The first component is logical whether or not the formula is a survival model. |
|
| 6 |
#' Second component is the distribution to use for survival modeling (this is only used if type=="brms") |
|
| 7 |
#' |
|
| 8 |
#' @examples |
|
| 9 |
#' |
|
| 10 |
#' .survModelParser("survival - binomial")
|
|
| 11 |
#' .survModelParser("survival")
|
|
| 12 |
#' .survModelParser("logistic")
|
|
| 13 |
#' .survModelParser("logistic+linear")
|
|
| 14 |
#' |
|
| 15 |
#' @keywords internal |
|
| 16 |
#' @noRd |
|
| 17 | ||
| 18 |
.survModelParser <- function(model) {
|
|
| 19 | 127x |
distributions <- c( |
| 20 | 127x |
"binomial", "gengamma", "gengamma.orig", "genf", "genf.orig", |
| 21 | 127x |
"weibull", "gamma", "exp", "llogis", "lnorm", "gompertz", |
| 22 | 127x |
"exponential", "lognormal" |
| 23 |
) |
|
| 24 | 127x |
if (grepl("survival", model)) {
|
| 25 | 7x |
survival <- TRUE |
| 26 | 7x |
model <- trimws(gsub("survival", "", model))
|
| 27 | 7x |
if (nchar(model) == 0) {
|
| 28 | ! |
model <- "weibull" |
| 29 |
} |
|
| 30 | 7x |
dist <- match.arg(model, distributions) |
| 31 | 7x |
return(list("survival" = survival, "model" = dist))
|
| 32 |
} else {
|
|
| 33 | 120x |
return(list("survival" = FALSE, "model" = model))
|
| 34 |
} |
|
| 35 |
} |
| 1 |
#' Functions to prepare several families of distributional brms models |
|
| 2 |
#' |
|
| 3 |
#' @param model a model passed from growthSS |
|
| 4 |
#' |
|
| 5 |
#' @return A list of elements to pass brmSS for fitting distributional models |
|
| 6 |
#' |
|
| 7 |
#' @examples |
|
| 8 |
#' .brmFamilyHelper("logistic")
|
|
| 9 |
#' .brmFamilyHelper("poisson: logistic")
|
|
| 10 |
#' .brmFamilyHelper("von_mises: logistic")
|
|
| 11 |
#' |
|
| 12 |
#' @keywords internal |
|
| 13 |
#' @noRd |
|
| 14 | ||
| 15 |
.brmFamilyHelper <- function(model) {
|
|
| 16 | 39x |
if (!grepl("[:]", model)) {
|
| 17 | 36x |
model <- paste0("student:", model)
|
| 18 |
} |
|
| 19 | 39x |
dist <- trimws(gsub(":.*", "", model))
|
| 20 | 39x |
rhs <- trimws(gsub(".*:", "", model))
|
| 21 | 39x |
family <- dist |
| 22 | 39x |
dpars <- brms::brmsfamily(dist)$dpars[-1] |
| 23 | 39x |
return(list(family = family, dpars = dpars, rhs = rhs)) |
| 24 |
} |
| 1 |
#' @description |
|
| 2 |
#' Internal function for calculating the gamma distribution of the rate parameter in gamma distributed |
|
| 3 |
#' data represented by single value traits. |
|
| 4 |
#' @param s1 A vector of numerics drawn from a uniform distribution. |
|
| 5 |
#' @examples |
|
| 6 |
#' out <- .conj_gamma_sv( |
|
| 7 |
#' s1 = rgamma(10, 1, 2), cred.int.level = 0.95, |
|
| 8 |
#' plot = FALSE |
|
| 9 |
#' ) |
|
| 10 |
#' lapply(out, head) |
|
| 11 |
#' @keywords internal |
|
| 12 |
#' @noRd |
|
| 13 |
.conj_gamma_sv <- function(s1 = NULL, priors = NULL, |
|
| 14 |
plot = FALSE, support = NULL, cred.int.level = NULL, |
|
| 15 |
calculatingSupport = FALSE) {
|
|
| 16 | 4x |
out <- list() |
| 17 |
#* `make default prior if none provided` |
|
| 18 | 4x |
if (is.null(priors)) {
|
| 19 | 4x |
priors <- list(shape = 0.5, scale = 0.5, known_shape = 1) |
| 20 |
} |
|
| 21 |
#* `Update gamma prior with sufficient statistics` |
|
| 22 | 4x |
n <- length(s1) |
| 23 | 4x |
S <- sum(s1) |
| 24 | 4x |
shape_prime <- (priors$known_shape * n) + priors$shape |
| 25 | 4x |
scale_prime <- priors$scale / (1 + (priors$scale * S)) |
| 26 |
#* `Define support if it is missing` |
|
| 27 | 4x |
if (is.null(support) && calculatingSupport) {
|
| 28 | 2x |
quantiles <- qgamma(c(0.0001, 0.9999), shape = shape_prime, scale = scale_prime) |
| 29 | 2x |
return(quantiles) |
| 30 |
} |
|
| 31 |
#* `Make Posterior Draws` |
|
| 32 | 2x |
out$posteriorDraws <- rgamma(10000, shape = shape_prime, scale = scale_prime) |
| 33 |
#* `posterior` |
|
| 34 | 2x |
dens1 <- dgamma(support, shape = shape_prime, scale = scale_prime) |
| 35 | 2x |
pdf1 <- dens1 / sum(dens1) |
| 36 | 2x |
out$pdf <- pdf1 |
| 37 | 2x |
hde1 <- .gammaHDE(shape_prime, scale_prime) |
| 38 | 2x |
hdi1 <- qgamma( |
| 39 | 2x |
c((1 - cred.int.level) / 2, (1 - ((1 - cred.int.level) / 2))), |
| 40 | 2x |
shape = shape_prime, scale = scale_prime |
| 41 |
) |
|
| 42 |
#* `Store summary` |
|
| 43 | 2x |
out$summary <- data.frame(HDE_1 = hde1, HDI_1_low = hdi1[1], HDI_1_high = hdi1[2]) |
| 44 | 2x |
out$posterior <- list( |
| 45 | 2x |
"shape" = shape_prime, "scale" = scale_prime, |
| 46 | 2x |
"known_shape" = priors$known_shape |
| 47 |
) |
|
| 48 |
#* `save s1 data for plotting` |
|
| 49 | 2x |
if (plot) {
|
| 50 | 2x |
out$plot_df <- data.frame( |
| 51 | 2x |
"range" = support, |
| 52 | 2x |
"prob" = pdf1, |
| 53 | 2x |
"sample" = rep("Sample 1", length(support))
|
| 54 |
) |
|
| 55 |
} |
|
| 56 | 2x |
return(out) |
| 57 |
} |
|
| 58 | ||
| 59 |
#' @description |
|
| 60 |
#' Internal function for calculating the HDE of a gamma distribution |
|
| 61 |
#' @param shape shape parameter |
|
| 62 |
#' @param scale scale parameter |
|
| 63 |
#' @examples |
|
| 64 |
#' .gammaHDE(1, 2) |
|
| 65 |
#' .gammaHDE(0, 1) |
|
| 66 |
#' .gammaHDE(10, 10) |
|
| 67 |
#' @keywords internal |
|
| 68 |
#' @noRd |
|
| 69 | ||
| 70 |
.gammaHDE <- function(shape, scale) {
|
|
| 71 | 25x |
if (shape >= 1) {
|
| 72 | 24x |
hde <- (shape - 1) * scale |
| 73 |
} else {
|
|
| 74 | 1x |
hde <- 0 |
| 75 |
} |
|
| 76 | 25x |
return(hde) |
| 77 |
} |
| 1 |
#' Default theme for ggplots made by pcvr functions. |
|
| 2 |
#' |
|
| 3 |
#' @import ggplot2 |
|
| 4 |
#' @importFrom ggplot2 %+replace% |
|
| 5 |
#' @return A ggplot theme |
|
| 6 |
#' @examples |
|
| 7 |
#' ggplot2::ggplot() + |
|
| 8 |
#' pcv_theme() |
|
| 9 |
#' @export |
|
| 10 |
#' |
|
| 11 |
pcv_theme <- function() {
|
|
| 12 | 141x |
ggplot2::theme_minimal() %+replace% |
| 13 | 141x |
ggplot2::theme( |
| 14 | 141x |
axis.text.x.bottom = ggplot2::element_text(hjust = 1), |
| 15 | 141x |
axis.line.y.left = ggplot2::element_line(), |
| 16 | 141x |
axis.line.x.bottom = ggplot2::element_line(), |
| 17 | 141x |
strip.background = ggplot2::element_rect(fill = "gray50", color = "gray20"), |
| 18 | 141x |
strip.text.x = ggplot2::element_text(size = 14, color = "white"), |
| 19 | 141x |
strip.text.y = ggplot2::element_text(size = 14, color = "white") |
| 20 |
) |
|
| 21 |
} |